Release_v0.3/000077500000000000000000000000001223142177000132035ustar00rootroot00000000000000Release_v0.3/.gitignore000066400000000000000000000000741223142177000151740ustar00rootroot00000000000000*.o CMakeCache.txt CMakeFiles/ Makefile cmake_install.cmake Release_v0.3/CMake/000077500000000000000000000000001223142177000141635ustar00rootroot00000000000000Release_v0.3/CMake/CMakeConfigTemplate.hpp000066400000000000000000000017001223142177000204740ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #ifndef CMAKE_CONFIG_HPP #define CMAKE_CONFIG_HPP #define ON true #define OFF false #define GEN_INSTALLATION_PATH "${CMAKE_INSTALL_PREFIX}/lib/i965/" #endif /* CMAKE_CONFIG_HPP */ Release_v0.3/CMake/FindDRM.cmake000066400000000000000000000014131223142177000164070ustar00rootroot00000000000000# # Try to find X library and include path. # Once done this will define # # DRM_FOUND # DRM_INCLUDE_PATH # DRM_LIBRARY # FIND_PATH(DRM_INCLUDE_PATH drm.h ~/include/libdrm/ /usr/include/libdrm/ /usr/local/include/libdrm/ /sw/include/libdrm/ /opt/local/include/libdrm/ DOC "The directory where drm.h resides") FIND_LIBRARY(DRM_LIBRARY NAMES DRM drm PATHS ~/lib/ /usr/lib64 /usr/lib /usr/local/lib64 /usr/local/lib /sw/lib /opt/local/lib DOC "The DRM library") IF(DRM_INCLUDE_PATH) INCLUDE_DIRECTORIES(${DRM_INCLUDE_PATH}) SET(DRM_FOUND 1 CACHE STRING "Set to 1 if DRM is found, 0 otherwise") ELSE(DRM_INCLUDE_PATH) SET(DRM_FOUND 0 CACHE STRING "Set to 1 if DRM is found, 0 otherwise") ENDIF(DRM_INCLUDE_PATH) MARK_AS_ADVANCED(DRM_FOUND) Release_v0.3/CMake/FindDRMIntel.cmake000066400000000000000000000016131223142177000174050ustar00rootroot00000000000000# # Try to find X library and include path. # Once done this will define # # DRM_INTEL_FOUND # DRM_INTEL_INCLUDE_PATH # FIND_PATH(DRM_INTEL_INCLUDE_PATH intel_bufmgr.h ~/include/libdrm/ /usr/include/libdrm/ /usr/local/include/libdrm/ /sw/include/libdrm/ /opt/local/include/libdrm/ DOC "The directory where intel_bufmgr.h resides") FIND_LIBRARY(DRM_INTEL_LIBRARY NAMES DRM_INTEL drm_intel PATHS ~/lib/ /usr/lib64 /usr/lib /usr/local/lib64 /usr/local/lib /sw/lib /opt/local/lib /usr/lib/i386-linux-gnu/ DOC "The DRM_INTEL library") IF(DRM_INTEL_INCLUDE_PATH) INCLUDE_DIRECTORIES(${DRM_INTEL_INCLUDE_PATH}) SET(DRM_INTEL_FOUND 1 CACHE STRING "Set to 1 if DRM_INTEL is found, 0 otherwise") ELSE(DRM_INTEL_INCLUDE_PATH) SET(DRM_INTEL_FOUND 0 CACHE STRING "Set to 1 if DRM_INTEL is found, 0 otherwise") ENDIF(DRM_INTEL_INCLUDE_PATH) MARK_AS_ADVANCED(DRM_INTEL_FOUND) Release_v0.3/CMake/FindEGL.cmake000066400000000000000000000030211223142177000163710ustar00rootroot00000000000000# # Try to find EGL library and include path. # Once done this will define # # EGL_FOUND # EGL_INCLUDE_PATH # EGL_LIBRARY # FIND_PATH(EGL_INCLUDE_PATH EGL/egl.h ~/include/ /usr/include/ /usr/local/include/ /sw/include/ /opt/local/include/ DOC "The directory where gen/program.h resides") FIND_LIBRARY(EGL_LIBRARY NAMES EGL egl PATHS ~/lib/ /usr/lib64 /usr/lib /usr/local/lib64 /usr/local/lib /sw/lib /opt/local/lib DOC "The EGL library") IF(EGL_INCLUDE_PATH) INCLUDE_DIRECTORIES(${EGL_INCLUDE_PATH}) SET(EGL_FOUND 1 CACHE STRING "Set to 1 if EGL is found, 0 otherwise") ELSE(EGL_INCLUDE_PATH) SET(EGL_FOUND 0 CACHE STRING "Set to 1 if EGL is found, 0 otherwise") ENDIF(EGL_INCLUDE_PATH) # Find mesa source code. FIND_PATH(MESA_SOURCE_PREFIX src/mesa/main/texobj.c $ENV{MESA_SOURCE_DIR} ${MAKE_CURRENT_SOURCE_DIR}/../mesa ~/mesa DOC "The mesa source directory which is needed for cl_khr_gl_sharing.") IF(MESA_SOURCE_PREFIX) SET(MESA_SOURCE_INCLUDES ${MESA_SOURCE_PREFIX}/src/mesa ${MESA_SOURCE_PREFIX}/include ${MESA_SOURCE_PREFIX}/src/mapi ${MESA_SOURCE_PREFIX}/src/mesa/drivers/dri/i965/ ${MESA_SOURCE_PREFIX}/src/mesa/drivers/dri/common/) SET(MESA_SOURCE_FOUND 1 CACHE STRING "Set to 1 if mesa source code is found, 0 otherwise") ELSE(MESA_SOURCE_PREFIX) SET(MESA_SOURCE_FOUND 0 CACHE STRING "Set to 1 if mesa source code is found, 0 otherwise") ENDIF(MESA_SOURCE_PREFIX) MARK_AS_ADVANCED(EGL_FOUND) Release_v0.3/CMake/FindGBE.cmake000066400000000000000000000013701223142177000163640ustar00rootroot00000000000000# # Try to find X library and include path. # Once done this will define # # GBE_FOUND # GBE_INCLUDE_PATH # GBE_LIBRARY # FIND_PATH(GBE_INCLUDE_PATH gen/program.h ~/include/ /usr/include/ /usr/local/include/ /sw/include/ /opt/local/include/ DOC "The directory where gen/program.h resides") FIND_LIBRARY(GBE_LIBRARY NAMES GBE gbe PATHS ~/lib/ /usr/lib64 /usr/lib /usr/local/lib64 /usr/local/lib /sw/lib /opt/local/lib DOC "The GBE library") IF(GBE_INCLUDE_PATH) INCLUDE_DIRECTORIES(${GBE_INCLUDE_PATH}) SET(GBE_FOUND 1 CACHE STRING "Set to 1 if GBE is found, 0 otherwise") ELSE(GBE_INCLUDE_PATH) SET(GBE_FOUND 0 CACHE STRING "Set to 1 if GBE is found, 0 otherwise") ENDIF(GBE_INCLUDE_PATH) MARK_AS_ADVANCED(GBE_FOUND) Release_v0.3/CMake/FindLLVM.cmake000066400000000000000000000066541223142177000165530ustar00rootroot00000000000000# Find the native LLVM includes and library # # LLVM_INCLUDE_DIR - where to find llvm include files # LLVM_LIBRARY_DIR - where to find llvm libs # LLVM_CFLAGS - llvm compiler flags # LLVM_LFLAGS - llvm linker flags # LLVM_MODULE_LIBS - list of llvm libs for working with modules. # LLVM_FOUND - True if llvm found. if (LLVM_INSTALL_DIR) find_program(LLVM_CONFIG_EXECUTABLE NAMES llvm-config-32 llvm-config-3.2 llvm-config-31 llvm-config-3.1 llvm-config-3.4 llvm-config DOC "llvm-config executable" PATHS ${LLVM_INSTALL_DIR} NO_DEFAULT_PATH) else (LLVM_INSTALL_DIR) find_program(LLVM_CONFIG_EXECUTABLE NAMES llvm-config-32 llvm-config-3.2 llvm-config-31 llvm-config-3.1 llvm-config-3.4 llvm-config DOC "llvm-config executable") endif (LLVM_INSTALL_DIR) if (LLVM_CONFIG_EXECUTABLE) message(STATUS "LLVM llvm-config found at: ${LLVM_CONFIG_EXECUTABLE}") else (LLVM_CONFIG_EXECUTABLE) message(FATAL_ERROR "Could NOT find LLVM executable, please add -DLLVM_INSTALL_DIR=/path/to/llvm-config/ in cmake command") endif (LLVM_CONFIG_EXECUTABLE) if (LLVM_FIND_VERSION_MAJOR AND LLVM_FIND_VERSION_MINOR) SET(LLVM_FIND_VERSION_NODOT "${LLVM_FIND_VERSION_MAJOR}${LLVM_FIND_VERSION_MINOR}") execute_process( COMMAND ${LLVM_CONFIG_EXECUTABLE} --version OUTPUT_VARIABLE LLVM_VERSION ) string(REGEX REPLACE "([0-9]*)\\.([0-9]*)[^0-9]*" "\\1\\2 " LLVM_VERSION_NODOT ${LLVM_VERSION}) if (LLVM_VERSION_NODOT VERSION_LESS LLVM_FIND_VERSION_NODOT) message(FATAL_ERROR "imcompatible LLVM version ${LLVM_VERSION} required ${LLVM_FIND_VERSION}") else (LLVM_VERSION_NODOT VERSION_LESS LLVM_FIND_VERSION_NODOT) if (LLVM_VERSION_NODOT VERSION_EQUAL LLVM_FIND_VERSION_NODOT) message(STATUS "find stable LLVM version ${LLVM_VERSION}") else (LLVM_VERSION_NODOT VERSION_EQUAL LLVM_FIND_VERSION_NODOT) message(STATUS "find unstable LLVM version ${LLVM_VERSION}") endif (LLVM_VERSION_NODOT VERSION_EQUAL LLVM_FIND_VERSION_NODOT) add_definitions("-DLLVM_${LLVM_VERSION_NODOT}") endif (LLVM_VERSION_NODOT VERSION_LESS LLVM_FIND_VERSION_NODOT) endif (LLVM_FIND_VERSION_MAJOR AND LLVM_FIND_VERSION_MINOR) execute_process( COMMAND ${LLVM_CONFIG_EXECUTABLE} --includedir OUTPUT_VARIABLE LLVM_INCLUDE_DIR OUTPUT_STRIP_TRAILING_WHITESPACE ) execute_process( COMMAND ${LLVM_CONFIG_EXECUTABLE} --libdir OUTPUT_VARIABLE LLVM_LIBRARY_DIR OUTPUT_STRIP_TRAILING_WHITESPACE ) execute_process( COMMAND ${LLVM_CONFIG_EXECUTABLE} --cppflags OUTPUT_VARIABLE LLVM_CFLAGS OUTPUT_STRIP_TRAILING_WHITESPACE ) execute_process( COMMAND ${LLVM_CONFIG_EXECUTABLE} --ldflags OUTPUT_VARIABLE LLVM_LFLAGS OUTPUT_STRIP_TRAILING_WHITESPACE ) execute_process( COMMAND ${LLVM_CONFIG_EXECUTABLE} --libs OUTPUT_VARIABLE LLVM_MODULE_LIBS OUTPUT_STRIP_TRAILING_WHITESPACE ) macro(add_one_lib name) FIND_LIBRARY(CLANG_LIB NAMES ${name} PATHS ${LLVM_LIBRARY_DIR} ) set(CLANG_LIBRARIES ${CLANG_LIBRARIES} ${CLANG_LIB}) unset(CLANG_LIB CACHE) endmacro() #Assume clang lib path same as llvm lib path add_one_lib("clangFrontend") add_one_lib("clangSerialization") add_one_lib("clangDriver") add_one_lib("clangCodeGen") add_one_lib("clangSema") add_one_lib("clangStaticAnalyzerFrontend") add_one_lib("clangStaticAnalyzerCheckers") add_one_lib("clangStaticAnalyzerCore") add_one_lib("clangAnalysis") add_one_lib("clangEdit") add_one_lib("clangAST") add_one_lib("clangParse") add_one_lib("clangSema") add_one_lib("clangLex") add_one_lib("clangBasic") Release_v0.3/CMake/FindOCLIcd.cmake000066400000000000000000000011401223142177000170170ustar00rootroot00000000000000# # Try to find ocl_icd library and include path. # Once done this will define # # OCLIcd_FOUND # OCLIcd_INCLUDE_PATH # FIND_PATH(OCLIcd_INCLUDE_PATH ocl_icd.h ~/include/ /usr/include/ /usr/local/include/ /sw/include/ /opt/local/include/ DOC "The directory where ocl_icd.h resides") IF(OCLIcd_INCLUDE_PATH) INCLUDE_DIRECTORIES(${OCLIcd_INCLUDE_PATH}) SET(OCLIcd_FOUND 1 CACHE STRING "Set to 1 if OCLIcd is found, 0 otherwise") ELSE(OCLIcd_INCLUDE_PATH) SET(OCLIcd_FOUND 0 CACHE STRING "Set to 1 if OCLIcd is found, 0 otherwise") ENDIF(OCLIcd_INCLUDE_PATH) MARK_AS_ADVANCED(OCLIcd_FOUND) Release_v0.3/CMake/FindXext.cmake000066400000000000000000000013241223142177000167160ustar00rootroot00000000000000# # Try to find Xext library path. # Once done this will define # # XEXT_FOUND # XEXT_LIBRARY # FIND_PATH(XEXT_INCLUDE_PATH X11/extensions/Xext.h /usr/include /usr/local/include /sw/include /opt/local/include DOC "The directory where Xext.h resides") FIND_LIBRARY(XEXT_LIBRARY NAMES XEXT Xext PATHS /usr/lib64 /usr/lib /usr/local/lib64 /usr/local/lib /sw/lib /opt/local/lib DOC "The XEXT library") IF(XEXT_INCLUDE_PATH) INCLUDE_DIRECTORIES(${XEXT_INCLUDE_PATH}) SET(XEXT_FOUND 1 CACHE STRING "Set to 1 if XEXT is found, 0 otherwise") ELSE(XEXT_INCLUDE_PATH) SET(XEXT_FOUND 0 CACHE STRING "Set to 1 if XEXT is found, 0 otherwise") ENDIF(XEXT_INCLUDE_PATH) MARK_AS_ADVANCED(XEXT_FOUND) Release_v0.3/CMake/FindXfixes.cmake000066400000000000000000000013721223142177000172370ustar00rootroot00000000000000# # Try to find Xfixes library path. # Once done this will define # # XFIXES_FOUND # XFIXES_LIBRARY # FIND_PATH(XFIXES_INCLUDE_PATH X11/extensions/Xfixes.h /usr/include /usr/local/include /sw/include /opt/local/include DOC "The directory where Xfixes.h resides") FIND_LIBRARY(XFIXES_LIBRARY NAMES XFIXES Xfixes PATHS /usr/lib64 /usr/lib /usr/local/lib64 /usr/local/lib /sw/lib /opt/local/lib DOC "The XFIXES library") IF(XFIXES_INCLUDE_PATH) INCLUDE_DIRECTORIES(${XFIXES_INCLUDE_PATH}) SET(XFIXES_FOUND 1 CACHE STRING "Set to 1 if XFIXES is found, 0 otherwise") ELSE(XFIXES_INCLUDE_PATH) SET(XFIXES_FOUND 0 CACHE STRING "Set to 1 if XFIXES is found, 0 otherwise") ENDIF(XFIXES_INCLUDE_PATH) MARK_AS_ADVANCED(XFIXES_FOUND) Release_v0.3/CMakeLists.txt000066400000000000000000000103141223142177000157420ustar00rootroot00000000000000############################################################################# # INTEL CORPORATION PROPRIETARY INFORMATION # # This software is supplied under the terms of a license agreement or # # nondisclosure agreement with Intel Corporation and may not be copied # # or disclosed except in accordance with the terms of that agreement. # # Copyright (C) 2009 Intel Corporation. All Rights Reserved. # ############################################################################# CMAKE_MINIMUM_REQUIRED(VERSION 2.6.0) PROJECT(OCL) set (LIBCL_DRIVER_VERSION_MAJOR 0) set (LIBCL_DRIVER_VERSION_MINOR 3) set (LIBCL_C_VERSION_MAJOR 1) set (LIBCL_C_VERSION_MINOR 1) configure_file ( "src/OCLConfig.h.in" "src/OCLConfig.h" ) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}) SET(CMAKE_VERBOSE_MAKEFILE "false") set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/CMake/") SET(EMULATE_IVB false CACHE BOOL "To emulate IVB") SET(EMULATE_SNB false CACHE BOOL "To emulate SNB") SET(EMULATE_HSW false CACHE BOOL "To emulate HSW") ADD_DEFINITIONS(-D__$(USER)__) # Force Release with debug info if (NOT CMAKE_BUILD_TYPE) set (CMAKE_BUILD_TYPE RelWithDebInfo) endif (NOT CMAKE_BUILD_TYPE) set (CMAKE_BUILD_TYPE ${CMAKE_BUILD_TYPE} CACHE STRING "assure config" FORCE) message(STATUS "Building mode: " ${CMAKE_BUILD_TYPE}) SET(CMAKE_CXX_FLAGS_DEBUGO0 "-O0 -g") SET(CMAKE_C_FLAGS_DEBUGO0 "-O0 -g") IF (EMULATE_HSW) SET (USE_FULSIM "true") ADD_DEFINITIONS(-DEMULATE_GEN=75) ELSEIF (EMULATE_IVB) SET (USE_FULSIM "true") ADD_DEFINITIONS(-DEMULATE_GEN=7) ELSEIF (EMULATE_SNB) SET (USE_FULSIM "true") ADD_DEFINITIONS(-DEMULATE_GEN=6) ELSE (EMULATE_IVB) SET (USE_FULSIM "false") ADD_DEFINITIONS(-DEMULATE_GEN=0) ENDIF (EMULATE_HSW) # XXX now hard coded to enable the clamp to border workaround for IVB. ADD_DEFINITIONS(-DGEN7_SAMPLER_CLAMP_BORDER_WORKAROUND) IF (USE_FULSIM) ADD_DEFINITIONS(-DUSE_FULSIM=1) ELSE (USE_FULSIM) ADD_DEFINITIONS(-DUSE_FULSIM=0) ENDIF (USE_FULSIM) SET(CMAKE_CXX_FLAGS "-Wall -Wno-invalid-offsetof -mfpmath=sse -fno-rtti -Wcast-align -std=c++0x -msse2 -msse3 -mssse3 -msse4.1 ${CMAKE_CXX_FLAGS}") SET(CMAKE_C_FLAGS "-Wall -mfpmath=sse -msse2 -Wcast-align -msse2 -msse3 -mssse3 -msse4.1 ${CMAKE_C_FLAGS}") # Front end stuff we need #INCLUDE(CMake/FindLLVM.cmake) Find_Package(LLVM 3.1) # XLib Find_Package(X11) IF(X11_FOUND) MESSAGE(STATUS "Looking for XLib - found") ELSE(X11_FOUND) MESSAGE(STATUS "Looking for XLib - not found") ENDIF(X11_FOUND) # DRM Find_Package(DRM) IF(DRM_FOUND) MESSAGE(STATUS "Looking for DRM - found") ELSE(DRM_FOUND) MESSAGE(STATUS "Looking for DRM - not found") ENDIF(DRM_FOUND) # OpenGL Find_Package(OpenGL) # Threads Find_Package(Threads) # DRM Intel Find_Package(DRMIntel) IF(DRM_INTEL_FOUND) MESSAGE(STATUS "Looking for DRM Intel - found") ELSE(DRM_INTEL_FOUND) MESSAGE(STATUS "Looking for DRM Intel - not found") ENDIF(DRM_INTEL_FOUND) # Xext Find_Package(Xext) IF(XEXT_FOUND) MESSAGE(STATUS "Looking for Xext - found") ELSE(XEXT_FOUND) MESSAGE(STATUS "Looking for Xext - not found") ENDIF(XEXT_FOUND) # Xfixes Find_Package(Xfixes) IF(XFIXES_FOUND) MESSAGE(STATUS "Looking for Xfixes - found") ELSE(XFIXES_FOUND) MESSAGE(STATUS "Looking for Xfixes - not found") ENDIF(XFIXES_FOUND) # Gen-backend (compiler) Find_Package(GBE) IF(GBE_FOUND) MESSAGE(STATUS "Looking for Gen-Backend - found") ELSE(GBE_FOUND) MESSAGE(STATUS "Looking for Gen-Backend - not found") ENDIF(GBE_FOUND) Find_Package(EGL) IF(EGL_FOUND) MESSAGE(STATUS "Looking for EGL - found") ELSE(EGL_FOUND) MESSAGE(STATUS "Looking for EGL - not found") ENDIF(EGL_FOUND) IF(MESA_SOURCE_FOUND) MESSAGE(STATUS "Looking for mesa source code - found") ELSE(MESA_SOURCE_FOUND) MESSAGE(STATUS "Looking for mesa source code - not found, cl_khr_gl_sharing will be disabled.") ENDIF(MESA_SOURCE_FOUND) Find_Package(OCLIcd) IF(OCLIcd_FOUND) MESSAGE(STATUS "Looking for OCL ICD header file - found") ELSE(OCLIcd_FOUND) MESSAGE(STATUS "Looking for OCL ICD header file - not found") ENDIF(OCLIcd_FOUND) Find_Package(PythonInterp) ADD_SUBDIRECTORY(include) ADD_SUBDIRECTORY(backend) ADD_SUBDIRECTORY(src) ADD_SUBDIRECTORY(utests) Release_v0.3/COPYING000066400000000000000000000636421223142177000142510ustar00rootroot00000000000000 GNU LESSER GENERAL PUBLIC LICENSE Version 2.1, February 1999 Copyright (C) 1991, 1999 Free Software Foundation, Inc. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. [This is the first released version of the Lesser GPL. It also counts as the successor of the GNU Library Public License, version 2, hence the version number 2.1.] Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public Licenses are intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This license, the Lesser General Public License, applies to some specially designated software packages--typically libraries--of the Free Software Foundation and other authors who decide to use it. You can use it too, but we suggest you first think carefully about whether this license or the ordinary General Public License is the better strategy to use in any particular case, based on the explanations below. When we speak of free software, we are referring to freedom of use, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish); that you receive source code or can get it if you want it; that you can change the software and use pieces of it in new free programs; and that you are informed that you can do these things. To protect your rights, we need to make restrictions that forbid distributors to deny you these rights or to ask you to surrender these rights. These restrictions translate to certain responsibilities for you if you distribute copies of the library or if you modify it. For example, if you distribute copies of the library, whether gratis or for a fee, you must give the recipients all the rights that we gave you. You must make sure that they, too, receive or can get the source code. If you link other code with the library, you must provide complete object files to the recipients, so that they can relink them with the library after making changes to the library and recompiling it. And you must show them these terms so they know their rights. We protect your rights with a two-step method: (1) we copyright the library, and (2) we offer you this license, which gives you legal permission to copy, distribute and/or modify the library. To protect each distributor, we want to make it very clear that there is no warranty for the free library. Also, if the library is modified by someone else and passed on, the recipients should know that what they have is not the original version, so that the original author's reputation will not be affected by problems that might be introduced by others. Finally, software patents pose a constant threat to the existence of any free program. We wish to make sure that a company cannot effectively restrict the users of a free program by obtaining a restrictive license from a patent holder. Therefore, we insist that any patent license obtained for a version of the library must be consistent with the full freedom of use specified in this license. Most GNU software, including some libraries, is covered by the ordinary GNU General Public License. This license, the GNU Lesser General Public License, applies to certain designated libraries, and is quite different from the ordinary General Public License. We use this license for certain libraries in order to permit linking those libraries into non-free programs. When a program is linked with a library, whether statically or using a shared library, the combination of the two is legally speaking a combined work, a derivative of the original library. The ordinary General Public License therefore permits such linking only if the entire combination fits its criteria of freedom. The Lesser General Public License permits more lax criteria for linking other code with the library. We call this license the "Lesser" General Public License because it does Less to protect the user's freedom than the ordinary General Public License. It also provides other free software developers Less of an advantage over competing non-free programs. These disadvantages are the reason we use the ordinary General Public License for many libraries. However, the Lesser license provides advantages in certain special circumstances. For example, on rare occasions, there may be a special need to encourage the widest possible use of a certain library, so that it becomes a de-facto standard. To achieve this, non-free programs must be allowed to use the library. A more frequent case is that a free library does the same job as widely used non-free libraries. In this case, there is little to gain by limiting the free library to free software only, so we use the Lesser General Public License. In other cases, permission to use a particular library in non-free programs enables a greater number of people to use a large body of free software. For example, permission to use the GNU C Library in non-free programs enables many more people to use the whole GNU operating system, as well as its variant, the GNU/Linux operating system. Although the Lesser General Public License is Less protective of the users' freedom, it does ensure that the user of a program that is linked with the Library has the freedom and the wherewithal to run that program using a modified version of the Library. The precise terms and conditions for copying, distribution and modification follow. Pay close attention to the difference between a "work based on the library" and a "work that uses the library". The former contains code derived from the library, whereas the latter must be combined with the library in order to run. GNU LESSER GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License Agreement applies to any software library or other program which contains a notice placed by the copyright holder or other authorized party saying it may be distributed under the terms of this Lesser General Public License (also called "this License"). Each licensee is addressed as "you". A "library" means a collection of software functions and/or data prepared so as to be conveniently linked with application programs (which use some of those functions and data) to form executables. The "Library", below, refers to any such software library or work which has been distributed under these terms. A "work based on the Library" means either the Library or any derivative work under copyright law: that is to say, a work containing the Library or a portion of it, either verbatim or with modifications and/or translated straightforwardly into another language. (Hereinafter, translation is included without limitation in the term "modification".) "Source code" for a work means the preferred form of the work for making modifications to it. For a library, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the library. Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running a program using the Library is not restricted, and output from such a program is covered only if its contents constitute a work based on the Library (independent of the use of the Library in a tool for writing it). Whether that is true depends on what the Library does and what the program that uses the Library does. 1. You may copy and distribute verbatim copies of the Library's complete source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and distribute a copy of this License along with the Library. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Library or any portion of it, thus forming a work based on the Library, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) The modified work must itself be a software library. b) You must cause the files modified to carry prominent notices stating that you changed the files and the date of any change. c) You must cause the whole of the work to be licensed at no charge to all third parties under the terms of this License. d) If a facility in the modified Library refers to a function or a table of data to be supplied by an application program that uses the facility, other than as an argument passed when the facility is invoked, then you must make a good faith effort to ensure that, in the event an application does not supply such function or table, the facility still operates, and performs whatever part of its purpose remains meaningful. (For example, a function in a library to compute square roots has a purpose that is entirely well-defined independent of the application. Therefore, Subsection 2d requires that any application-supplied function or table used by this function must be optional: if the application does not supply it, the square root function must still compute square roots.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Library, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Library, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Library. In addition, mere aggregation of another work not based on the Library with the Library (or with a work based on the Library) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may opt to apply the terms of the ordinary GNU General Public License instead of this License to a given copy of the Library. To do this, you must alter all the notices that refer to this License, so that they refer to the ordinary GNU General Public License, version 2, instead of to this License. (If a newer version than version 2 of the ordinary GNU General Public License has appeared, then you can specify that version instead if you wish.) Do not make any other change in these notices. Once this change is made in a given copy, it is irreversible for that copy, so the ordinary GNU General Public License applies to all subsequent copies and derivative works made from that copy. This option is useful when you wish to copy part of the code of the Library into a program that is not a library. 4. You may copy and distribute the Library (or a portion or derivative of it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange. If distribution of object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place satisfies the requirement to distribute the source code, even though third parties are not compelled to copy the source along with the object code. 5. A program that contains no derivative of any portion of the Library, but is designed to work with the Library by being compiled or linked with it, is called a "work that uses the Library". Such a work, in isolation, is not a derivative work of the Library, and therefore falls outside the scope of this License. However, linking a "work that uses the Library" with the Library creates an executable that is a derivative of the Library (because it contains portions of the Library), rather than a "work that uses the library". The executable is therefore covered by this License. Section 6 states terms for distribution of such executables. When a "work that uses the Library" uses material from a header file that is part of the Library, the object code for the work may be a derivative work of the Library even though the source code is not. Whether this is true is especially significant if the work can be linked without the Library, or if the work is itself a library. The threshold for this to be true is not precisely defined by law. If such an object file uses only numerical parameters, data structure layouts and accessors, and small macros and small inline functions (ten lines or less in length), then the use of the object file is unrestricted, regardless of whether it is legally a derivative work. (Executables containing this object code plus portions of the Library will still fall under Section 6.) Otherwise, if the work is a derivative of the Library, you may distribute the object code for the work under the terms of Section 6. Any executables containing that work also fall under Section 6, whether or not they are linked directly with the Library itself. 6. As an exception to the Sections above, you may also combine or link a "work that uses the Library" with the Library to produce a work containing portions of the Library, and distribute that work under terms of your choice, provided that the terms permit modification of the work for the customer's own use and reverse engineering for debugging such modifications. You must give prominent notice with each copy of the work that the Library is used in it and that the Library and its use are covered by this License. You must supply a copy of this License. If the work during execution displays copyright notices, you must include the copyright notice for the Library among them, as well as a reference directing the user to the copy of this License. Also, you must do one of these things: a) Accompany the work with the complete corresponding machine-readable source code for the Library including whatever changes were used in the work (which must be distributed under Sections 1 and 2 above); and, if the work is an executable linked with the Library, with the complete machine-readable "work that uses the Library", as object code and/or source code, so that the user can modify the Library and then relink to produce a modified executable containing the modified Library. (It is understood that the user who changes the contents of definitions files in the Library will not necessarily be able to recompile the application to use the modified definitions.) b) Use a suitable shared library mechanism for linking with the Library. A suitable mechanism is one that (1) uses at run time a copy of the library already present on the user's computer system, rather than copying library functions into the executable, and (2) will operate properly with a modified version of the library, if the user installs one, as long as the modified version is interface-compatible with the version that the work was made with. c) Accompany the work with a written offer, valid for at least three years, to give the same user the materials specified in Subsection 6a, above, for a charge no more than the cost of performing this distribution. d) If distribution of the work is made by offering access to copy from a designated place, offer equivalent access to copy the above specified materials from the same place. e) Verify that the user has already received a copy of these materials or that you have already sent this user a copy. For an executable, the required form of the "work that uses the Library" must include any data and utility programs needed for reproducing the executable from it. However, as a special exception, the materials to be distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. It may happen that this requirement contradicts the license restrictions of other proprietary libraries that do not normally accompany the operating system. Such a contradiction means you cannot use both them and the Library together in an executable that you distribute. 7. You may place library facilities that are a work based on the Library side-by-side in a single library together with other library facilities not covered by this License, and distribute such a combined library, provided that the separate distribution of the work based on the Library and of the other library facilities is otherwise permitted, and provided that you do these two things: a) Accompany the combined library with a copy of the same work based on the Library, uncombined with any other library facilities. This must be distributed under the terms of the Sections above. b) Give prominent notice with the combined library of the fact that part of it is a work based on the Library, and explaining where to find the accompanying uncombined form of the same work. 8. You may not copy, modify, sublicense, link with, or distribute the Library except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense, link with, or distribute the Library is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 9. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Library or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Library (or any work based on the Library), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Library or works based on it. 10. Each time you redistribute the Library (or any work based on the Library), the recipient automatically receives a license from the original licensor to copy, distribute, link with or modify the Library subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties with this License. 11. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Library at all. For example, if a patent license would not permit royalty-free redistribution of the Library by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Library. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply, and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 12. If the distribution and/or use of the Library is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Library under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 13. The Free Software Foundation may publish revised and/or new versions of the Lesser General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Library specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Library does not specify a license version number, you may choose any version ever published by the Free Software Foundation. 14. If you wish to incorporate parts of the Library into other free programs whose distribution conditions are incompatible with these, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Libraries If you develop a new library, and you want it to be of the greatest possible use to the public, we recommend making it free software that everyone can redistribute and change. You can do so by permitting redistribution under these terms (or, alternatively, under the terms of the ordinary General Public License). To apply these terms, attach the following notices to the library. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Also add information on how to contact you by electronic and paper mail. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the library, if necessary. Here is a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the library `Frob' (a library for tweaking knobs) written by James Random Hacker. , 1 April 1990 Ty Coon, President of Vice That's all there is to it! Release_v0.3/README.md000066400000000000000000000002231223142177000144570ustar00rootroot00000000000000We host documents at the following wiki page: [http://wiki.freedesktop.org/www/Software/Beignet](http://wiki.freedesktop.org/www/Software/Beignet) Release_v0.3/backend/000077500000000000000000000000001223142177000145725ustar00rootroot00000000000000Release_v0.3/backend/CMakeLists.txt000066400000000000000000000113661223142177000173410ustar00rootroot00000000000000project (GBE) set (LIBGBE_VERSION_MAJOR 0) set (LIBGBE_VERSION_MINOR 2) cmake_minimum_required (VERSION 2.6.0) set (GBE_CMAKE_DIR "${GBE_SOURCE_DIR}/cmake") set (CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${GBE_CMAKE_DIR}") ############################################################## # Compilation directives ############################################################## set (GBE_DEBUG_MEMORY false CACHE bool "Activate the memory debugger") set (GBE_USE_BLOB false CACHE bool "Compile everything from one big file") ############################################################## # Compiler ############################################################## if (UNIX) set (COMPILER "GCC" CACHE INT "Compiler to choose on Linux (GCC,ICC,CLANG)") endif (UNIX) # Force Release with debug info if (NOT CMAKE_BUILD_TYPE) set (CMAKE_BUILD_TYPE RelWithDebInfo) endif (NOT CMAKE_BUILD_TYPE) set (CMAKE_BUILD_TYPE ${CMAKE_BUILD_TYPE} CACHE STRING "assure config" FORCE) message(STATUS "Building mode: " ${CMAKE_BUILD_TYPE}) if (GBE_DEBUG_MEMORY) set (GBE_DEBUG_MEMORY_FLAG "-DGBE_DEBUG_MEMORY=1") else (GBE_DEBUG_MEMORY) set (GBE_DEBUG_MEMORY_FLAG "-DGBE_DEBUG_MEMORY=0") endif (GBE_DEBUG_MEMORY) # Hide all symbols and allows the symbols declared as visible to be exported set (CMAKE_C_CXX_FLAGS "-fvisibility=hidden ${CMAKE_C_CXX_FLAGS}") if (COMPILER STREQUAL "GCC") set (CMAKE_C_CXX_FLAGS "${CMAKE_C_CXX_FLAGS} -funroll-loops -Wstrict-aliasing=2 -fstrict-aliasing -msse2 -msse3 -mssse3 -msse4.1 -fPIC -Wall") set (CMAKE_C_CXX_FLAGS "${CMAKE_C_CXX_FLAGS} ${LLVM_CFLAGS}") set (CMAKE_CXX_FLAGS "${CMAKE_C_CXX_FLAGS} -Wno-invalid-offsetof -fno-rtti -std=c++0x") set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${GBE_DEBUG_MEMORY_FLAG}") set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${GBE_COMPILE_UTESTS_FLAG}") set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wl,-E") set (CMAKE_SHARED_LINKER_FLAGS "-Wl,--no-undefined ${LLVM_LFLAGS}") set (CMAKE_CXX_FLAGS_DEBUG "-g -DGBE_DEBUG=1") set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -g -DGBE_DEBUG=1") set (CMAKE_CXX_FLAGS_MINSIZEREL "-Os -DNDEBUG -DGBE_DEBUG=0") set (CMAKE_CXX_FLAGS_RELEASE "-O2 -DNDEBUG -DGBE_DEBUG=0") set (CMAKE_C_FLAGS "${CMAKE_C_CXX_FLAGS}") set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${GBE_DEBUG_MEMORY_FLAG}") set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${GBE_COMPILE_UTESTS_FLAG}") set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wl,-E") set (CMAKE_C_FLAGS_DEBUG "-g -DGBE_DEBUG=1") set (CMAKE_C_FLAGS_RELWITHDEBINFO "-O2 -g -DGBE_DEBUG=1") set (CMAKE_C_FLAGS_MINSIZEREL "-Os -DNDEBUG -DGBE_DEBUG=0") set (CMAKE_C_FLAGS_RELEASE "-O2 -DNDEBUG -DGBE_DEBUG=0") elseif (COMPILER STREQUAL "CLANG") set (CMAKE_C_COMPILER "clang") set (CMAKE_C_FLAGS "-Wall -std=c99") set (CMAKE_C_FLAGS_DEBUG "-g -DGBE_DEBUG=1") set (CMAKE_C_FLAGS_RELWITHDEBINFO "-O2 -g -DGBE_DEBUG=1") set (CMAKE_C_FLAGS_MINSIZEREL "-Os -DNDEBUG -DGBE_DEBUG=0") set (CMAKE_C_FLAGS_RELEASE "-O2 -DNDEBUG -DGBE_DEBUG=0") set (CMAKE_CXX_COMPILER "clang++") set (CMAKE_CXX_FLAGS "-fstrict-aliasing -msse2 -fPIC -Wall -Wno-format-security -Wno-invalid-offsetof -std=c++0x") set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${GBE_DEBUG_MEMORY_FLAG}") set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${GBE_COMPILE_UTESTS_FLAG}") set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${VISIBILITY_FLAG}") set (CMAKE_CXX_FLAGS_DEBUG "-g -DGBE_DEBUG=1") set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -g -DGBE_DEBUG=1") set (CMAKE_CXX_FLAGS_MINSIZEREL "-Os -DNDEBUG -DGBE_DEBUG=0") set (CMAKE_CXX_FLAGS_RELEASE "-O2 -DNDEBUG -DGBE_DEBUG=0") set (CMAKE_AR "/usr/bin/llvm-ar") set (CMAKE_LINKER "/usr/bin/llvm-ld") set (CMAKE_NM "/usr/bin/llvm-nm") set (CMAKE_OBJDUMP "/usr/bin/llvm-objdump") set (CMAKE_RANLIB "ranlib") elseif (COMPILER STREQUAL "ICC") set (CMAKE_CXX_COMPILER "icpc") set (CMAKE_C_COMPILER "icc") set (CMAKE_CXX_FLAGS "-std=c++0x -wd2928 -Wall -fPIC -fstrict-aliasing -fp-model fast -xSSE2") set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${GBE_DEBUG_MEMORY_FLAG}") set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${GBE_COMPILE_UTESTS_FLAG}") set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${VISIBILITY_FLAG} -Wl,-E") set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${GBE_DEBUG_MODE_FLAG}") set (CMAKE_CXX_FLAGS_DEBUG "-g -O0 -DGBE_DEBUG=1") set (CCMAKE_CXX_FLAGS_RELWITHDEBINFO "-g -O2 -DGBE_DEBUG=1") set (CMAKE_CXX_FLAGS_RELEASE "-DNDEBUG -O2 -DGBE_DEBUG=0") set (CCMAKE_CXX_FLAGS_MINSIZEREL "-Os -DGBE_DEBUG=0") set (CMAKE_EXE_LINKER_FLAGS "") endif () include_directories (${CMAKE_CURRENT_BINARY_DIR}) ############################################################## # Project source code ############################################################## add_subdirectory (src) Release_v0.3/backend/kernels/000077500000000000000000000000001223142177000162355ustar00rootroot00000000000000Release_v0.3/backend/kernels/compile.sh000077500000000000000000000001451223142177000202240ustar00rootroot00000000000000#!/bin/bash clang -emit-llvm -O3 -target nvptx -c $1 -o $1.o llvm-dis $1.o rm $1.o mv $1.o.ll $1.ll Release_v0.3/backend/src/000077500000000000000000000000001223142177000153615ustar00rootroot00000000000000Release_v0.3/backend/src/.gitignore000066400000000000000000000001571223142177000173540ustar00rootroot00000000000000GBEConfig.h libgbe.so ocl_common_defines_str.cpp ocl_stdlib.h ocl_stdlib.h.pch ocl_stdlib_str.cpp ocl_vector.h Release_v0.3/backend/src/CMakeLists.txt000066400000000000000000000126611223142177000201270ustar00rootroot00000000000000set (ocl_vector_spec_file ${GBE_SOURCE_DIR}/src/builtin_vector_proto.def) set (ocl_vector_file ${GBE_SOURCE_DIR}/src/ocl_vector.h) set (ocl_as_file ${GBE_SOURCE_DIR}/src/ocl_as.h) set (ocl_convert_file ${GBE_SOURCE_DIR}/src/ocl_convert.h) set (ocl_stdlib_tmpl_file ${GBE_SOURCE_DIR}/src/ocl_stdlib.tmpl.h) set (ocl_common_header_file ${GBE_SOURCE_DIR}/src/ocl_common_defines.h) set (ocl_blob_file ${CMAKE_CURRENT_BINARY_DIR}/ocl_stdlib.h) set (ocl_blob_cpp_file ${GBE_SOURCE_DIR}/src/ocl_stdlib_str.cpp) set (ocl_gen_blob_cmd ${GBE_SOURCE_DIR}/src/update_blob_ocl_header.py) set (ocl_gen_vector_cmd ${GBE_SOURCE_DIR}/src/gen_builtin_vector.py) set (string_header "\\\"string\\\"") add_custom_command( OUTPUT ${ocl_blob_cpp_file} COMMAND rm -rf ${ocl_blob_cpp_file} COMMAND echo "\\\#include ${string_header}" >> ${ocl_blob_cpp_file} COMMAND echo "namespace gbe {" >> ${ocl_blob_cpp_file} COMMAND echo "std::string ocl_stdlib_str = " >> ${ocl_blob_cpp_file} # Yeah!!! welcome to back slash hell COMMAND cat ${ocl_blob_file} |sed 's/\\\\/\\\\\\\\/g' | sed 's/\\\"/\\\\\\\"/g' | awk '{ printf \(\"\\"%s\\\\n\\"\\n\", $$0\) }' >> ${ocl_blob_cpp_file} COMMAND echo "\;" >> ${ocl_blob_cpp_file} COMMAND echo "}" >> ${ocl_blob_cpp_file} COMMAND echo "" >> ${ocl_blob_cpp_file} DEPENDS ${ocl_blob_file}) set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES "ocl_vector.h;ocl_stdlib.h") add_custom_command( OUTPUT ${ocl_vector_file} COMMAND ${PYTHON_EXECUTABLE} ${ocl_gen_vector_cmd} ${ocl_vector_spec_file} ${ocl_vector_file} DEPENDS ${ocl_gen_vector_cmd} ${ocl_vector_spec_file}) add_custom_command( OUTPUT ${ocl_blob_file} COMMAND ${PYTHON_EXECUTABLE} ${ocl_gen_blob_cmd} ${ocl_stdlib_tmpl_file} ${ocl_blob_file} DEPENDS ${ocl_gen_blob_cmd} ${ocl_stdlib_tmpl_file} ${ocl_common_header_file} ${ocl_vector_file} ${ocl_as_file} ${ocl_convert_file}) set (pch_object ${ocl_blob_file}.pch) # generate pch object if (LLVM_VERSION_NODOT VERSION_GREATER 32) set (clang_cmd -cc1 -x cl -triple spir -ffp-contract=off -emit-pch) else (LLVM_VERSION_NODOT VERSION_GREATER 32) if (LLVM_VERSION_NODOT VERSION_GREATER 31) set (clang_cmd -cc1 -x cl -triple nvptx -ffp-contract=off -emit-pch) else (LLVM_VERSION_NODOT VERSION_GREATER 31) set (clang_cmd -cc1 -x cl -triple ptx32 -emit-pch) endif (LLVM_VERSION_NODOT VERSION_GREATER 31) endif (LLVM_VERSION_NODOT VERSION_GREATER 32) set (clang_cmd ${clang_cmd} -fno-builtin -DGEN7_SAMPLER_CLAMP_BORDER_WORKAROUND) add_custom_command( OUTPUT ${pch_object} COMMAND rm -f ${pch_object} COMMAND clang ${clang_cmd} ${ocl_blob_file} -o ${pch_object} DEPENDS ${ocl_blob_file} ) add_custom_target(pch_object DEPENDS ${pch_object}) if (GBE_USE_BLOB) set (GBE_SRC blob.cpp backend/gen/gen_mesa_disasm.c) else (GBE_USE_BLOB) set (GBE_SRC ocl_stdlib.h ocl_stdlib_str.cpp # this file is auto-generated. sys/vector.hpp sys/hash_map.hpp sys/map.hpp sys/set.hpp sys/intrusive_list.hpp sys/intrusive_list.cpp sys/exception.hpp sys/assert.cpp sys/assert.hpp sys/alloc.cpp sys/alloc.hpp sys/mutex.cpp sys/mutex.hpp sys/platform.cpp sys/platform.hpp sys/cvar.cpp sys/cvar.hpp ir/context.cpp ir/context.hpp ir/profile.cpp ir/profile.hpp ir/type.cpp ir/type.hpp ir/unit.cpp ir/unit.hpp ir/constant.cpp ir/constant.hpp ir/sampler.cpp ir/sampler.hpp ir/image.cpp ir/image.hpp ir/instruction.cpp ir/instruction.hpp ir/liveness.cpp ir/register.cpp ir/register.hpp ir/function.cpp ir/function.hpp ir/value.cpp ir/value.hpp ir/lowering.cpp ir/lowering.hpp backend/context.cpp backend/context.hpp backend/program.cpp backend/program.hpp backend/program.h llvm/llvm_gen_backend.cpp llvm/llvm_passes.cpp llvm/llvm_scalarize.cpp llvm/llvm_to_gen.cpp llvm/llvm_gen_backend.hpp llvm/llvm_gen_ocl_function.hxx llvm/llvm_to_gen.hpp backend/gen/gen_mesa_disasm.c backend/gen_insn_selection.cpp backend/gen_insn_selection.hpp backend/gen_insn_scheduling.cpp backend/gen_insn_scheduling.hpp backend/gen_reg_allocation.cpp backend/gen_reg_allocation.hpp backend/gen_context.cpp backend/gen_context.hpp backend/gen_program.cpp backend/gen_program.hpp backend/gen_program.h backend/gen_defs.hpp backend/gen_encoder.hpp backend/gen_encoder.cpp) endif (GBE_USE_BLOB) include_directories (.) link_directories (${LLVM_LIBRARY_DIRS}) include_directories(${LLVM_INCLUDE_DIRS}) add_library (gbe SHARED ${GBE_SRC}) ADD_DEPENDENCIES (gbe pch_object) target_link_libraries( gbe ${DRM_INTEL_LIBRARY} ${DRM_LIBRARY} ${OPENGL_LIBRARIES} ${CLANG_LIBRARIES} ${LLVM_MODULE_LIBS} ${CMAKE_THREAD_LIBS_INIT} ${CMAKE_DL_LIBS}) link_directories (${LLVM_LIBRARY_DIR}) ADD_EXECUTABLE(gbe_bin_generater gbe_bin_generater.cpp) TARGET_LINK_LIBRARIES(gbe_bin_generater gbe) install (TARGETS gbe LIBRARY DESTINATION lib) install (FILES ${pch_object} DESTINATION lib) install (FILES backend/program.h DESTINATION include/gen) set (PCH_OBJECT_DIR "${pch_object};${CMAKE_INSTALL_PREFIX}/lib/ocl_stdlib.h.pch") configure_file ( "GBEConfig.h.in" "GBEConfig.h" ) Release_v0.3/backend/src/GBEConfig.h.in000066400000000000000000000003041223142177000176570ustar00rootroot00000000000000// the configured options and settings for LIBGBE #define LIBGBE_VERSION_MAJOR @LIBGBE_VERSION_MAJOR@ #define LIBGBE_VERSION_MINOR @LIBGBE_VERSION_MINOR@ #define PCH_OBJECT_DIR "@PCH_OBJECT_DIR@" Release_v0.3/backend/src/backend/000077500000000000000000000000001223142177000167505ustar00rootroot00000000000000Release_v0.3/backend/src/backend/context.cpp000066400000000000000000000611001223142177000211360ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file context.cpp * \author Benjamin Segovia */ #include "backend/context.hpp" #include "backend/program.hpp" #include "backend/gen_encoder.hpp" #include "ir/unit.hpp" #include "ir/function.hpp" #include "ir/profile.hpp" #include "ir/liveness.hpp" #include "ir/value.hpp" #include "ir/image.hpp" #include "ir/sampler.hpp" #include "sys/cvar.hpp" #include namespace gbe { /*! Structure that keeps track of allocation in the register file. This is * actually needed by Context (and not only by GenContext) because both * simulator and hardware have to deal with constant pushing which uses the * register file * * Since Gen is pretty flexible, we just maintain a free list for the * register file (as a classical allocator) and coalesce blocks when required */ class RegisterFilePartitioner { public: RegisterFilePartitioner(void); ~RegisterFilePartitioner(void); /*! Allocate some memory in the register file. Return 0 if out-of-memory. By * the way, zero is not a valid offset since r0 is always preallocated by * the hardware. Note that we always use the left most block when * allocating, so it makes sense for constant pushing */ int16_t allocate(int16_t size, int16_t alignment, bool bFwd=false); /*! Free the given register file piece */ void deallocate(int16_t offset); /*! Spilt a block into 2 blocks */ void splitBlock(int16_t offset, int16_t subOffset); private: /*! May need to make that run-time in the future */ static const int16_t RegisterFileSize = 4*KB; /*! Double chained list of free spaces */ struct Block { Block(int16_t offset, int16_t size) : prev(NULL), next(NULL), offset(offset), size(size) {} Block *prev, *next; //!< Previous and next free blocks int16_t offset; //!< Where the free block starts int16_t size; //!< Size of the free block }; /*! Try to coalesce two blocks (left and right). They must be in that order. * If the colascing was done, the left block is deleted */ void coalesce(Block *left, Block *right); /*! Head and tail of the free list */ Block *head; Block *tail; /*! Handle free list element allocation */ DECL_POOL(Block, blockPool); /*! Track allocated memory blocks */ map allocatedBlocks; /*! Use custom allocators */ GBE_CLASS(RegisterFilePartitioner); }; RegisterFilePartitioner::RegisterFilePartitioner(void) { // r0 is always set by the HW and used at the end by EOT const int16_t offset = GEN_REG_SIZE; const int16_t size = RegisterFileSize - offset; tail = head = this->newBlock(offset, size); } RegisterFilePartitioner::~RegisterFilePartitioner(void) { while (this->head) { Block *next = this->head->next; this->deleteBlock(this->head); this->head = next; } } int16_t RegisterFilePartitioner::allocate(int16_t size, int16_t alignment, bool bFwd) { // Make it simple and just use the first block we find Block *list = bFwd ? head : tail; while (list) { int16_t aligned; int16_t spaceOnLeft; int16_t spaceOnRight; if(bFwd) { aligned = ALIGN(list->offset, alignment); spaceOnLeft = aligned - list->offset; spaceOnRight = list->size - size - spaceOnLeft; // Not enough space in this block if (spaceOnRight < 0) { list = list->next; continue; } } else { int16_t unaligned = list->offset + list->size - size - (alignment-1); if(unaligned < 0) { list = list->prev; continue; } aligned = ALIGN(unaligned, alignment); //alloc from block's tail spaceOnLeft = aligned - list->offset; spaceOnRight = list->size - size - spaceOnLeft; // Not enough space in this block if (spaceOnLeft < 0) { list = list->prev; continue; } } // Cool we can use this block Block *left = list->prev; Block *right = list->next; // If we left a hole on the left, create a new block if (spaceOnLeft) { Block *newBlock = this->newBlock(list->offset, spaceOnLeft); if (left) { left->next = newBlock; newBlock->prev = left; } if (right) { newBlock->next = right; right->prev = newBlock; } left = newBlock; } // If we left a hole on the right, create a new block as well if (spaceOnRight) { Block *newBlock = this->newBlock(aligned + size, spaceOnRight); if (left) { left->next = newBlock; newBlock->prev = left; } if (right) { right->prev = newBlock; newBlock->next = right; } right = newBlock; } // Chain both successors and predecessors when the entire block was // allocated if (spaceOnLeft == 0 && spaceOnRight == 0) { if (left) left->next = right; if (right) right->prev = left; } // Update the head of the free blocks if (list == head) { if (left) head = left; else if (right) head = right; else head = NULL; } // Update the tail of the free blocks if (list == tail) { if (right) tail = right; else if (left) tail = left; else tail = NULL; } // Free the block and check the consistency this->deleteBlock(list); if (head && head->next) GBE_ASSERT(head->next->prev == head); if (tail && tail->prev) GBE_ASSERT(tail->prev->next == tail); // Track the allocation to retrieve the size later allocatedBlocks.insert(std::make_pair(aligned, size)); // We have a valid offset now return aligned; } return 0; } void RegisterFilePartitioner::deallocate(int16_t offset) { // Retrieve the size in the allocation map auto it = allocatedBlocks.find(offset); GBE_ASSERT(it != allocatedBlocks.end()); const int16_t size = it->second; // Find the two blocks where to insert the new block Block *list = tail, *next = NULL; while (list != NULL) { if (list->offset < offset) break; next = list; list = list->prev; } // Create the block and insert it Block *newBlock = this->newBlock(offset, size); if (list) { GBE_ASSERT(list->offset + list->size <= offset); list->next = newBlock; newBlock->prev = list; } else this->head = newBlock; // list is NULL means newBlock should be the head. if (next) { GBE_ASSERT(offset + size <= next->offset); next->prev = newBlock; newBlock->next = next; } else this->tail = newBlock; // next is NULL means newBlock should be the tail. if (list != NULL || next != NULL) { // Coalesce the blocks if possible this->coalesce(list, newBlock); this->coalesce(newBlock, next); } // Do not track this allocation anymore allocatedBlocks.erase(it); } void RegisterFilePartitioner::coalesce(Block *left, Block *right) { if (left == NULL || right == NULL) return; GBE_ASSERT(left->offset < right->offset); GBE_ASSERT(left->next == right); GBE_ASSERT(right->prev == left); if (left->offset + left->size == right->offset) { right->offset = left->offset; right->size += left->size; if (left->prev) left->prev->next = right; right->prev = left->prev; if (left == this->head) this->head = right; this->deleteBlock(left); } } void RegisterFilePartitioner::splitBlock(int16_t offset, int16_t subOffset) { // Retrieve the size in the allocation map auto it = allocatedBlocks.find(offset); GBE_ASSERT(it != allocatedBlocks.end()); while(subOffset > it->second) { subOffset -= it->second; offset += it->second; it = allocatedBlocks.find(offset); GBE_ASSERT(it != allocatedBlocks.end()); } if(subOffset == 0) return; int16_t size = it->second; allocatedBlocks.erase(it); // Track the allocation to retrieve the size later allocatedBlocks.insert(std::make_pair(offset, subOffset)); allocatedBlocks.insert(std::make_pair(offset + subOffset, size - subOffset)); } static int alignScratchSize(int size){ int i = 0; for(; i < size; i+=1024) ; return i; } /////////////////////////////////////////////////////////////////////////// // Generic Context (shared by the simulator and the HW context) /////////////////////////////////////////////////////////////////////////// IVAR(OCL_SIMD_WIDTH, 8, 15, 16); Context::Context(const ir::Unit &unit, const std::string &name) : unit(unit), fn(*unit.getFunction(name)), name(name), liveness(NULL), dag(NULL) { GBE_ASSERT(unit.getPointerSize() == ir::POINTER_32_BITS); this->liveness = GBE_NEW(ir::Liveness, const_cast(fn)); this->dag = GBE_NEW(ir::FunctionDAG, *this->liveness); this->partitioner = GBE_NEW_NO_ARG(RegisterFilePartitioner); if (fn.getSimdWidth() == 0 || OCL_SIMD_WIDTH != 15) this->simdWidth = nextHighestPowerOf2(OCL_SIMD_WIDTH); else this->simdWidth = fn.getSimdWidth(); this->scratchOffset = 0; } Context::~Context(void) { GBE_SAFE_DELETE(this->partitioner); GBE_SAFE_DELETE(this->dag); GBE_SAFE_DELETE(this->liveness); } Kernel *Context::compileKernel(void) { this->kernel = this->allocateKernel(); this->kernel->simdWidth = this->simdWidth; this->buildPatchList(); this->buildArgList(); this->buildUsedLabels(); this->buildJIPs(); this->buildStack(); this->handleSLM(); if (this->emitCode() == false) { GBE_DELETE(this->kernel); this->kernel = NULL; } if(this->kernel != NULL) { this->kernel->scratchSize = alignScratchSize(this->scratchOffset); this->kernel->ctx = this; } return this->kernel; } int16_t Context::allocate(int16_t size, int16_t alignment) { return partitioner->allocate(size, alignment); } void Context::deallocate(int16_t offset) { partitioner->deallocate(offset); } void Context::splitBlock(int16_t offset, int16_t subOffset) { partitioner->splitBlock(offset, subOffset); } int32_t Context::allocConstBuf(uint32_t argID) { GBE_ASSERT(kernel->args[argID].type == GBE_ARG_CONSTANT_PTR); //free previous int32_t offset = kernel->getCurbeOffset(GBE_CURBE_EXTRA_ARGUMENT, argID+GBE_CONSTANT_BUFFER); if(offset >= 0) deallocate(offset+GEN_REG_SIZE); if(kernel->args[argID].bufSize > 0) { //use 32 alignment here as GEN_REG_SIZE, need dynamic by type? newCurbeEntry(GBE_CURBE_EXTRA_ARGUMENT, GBE_CONSTANT_BUFFER+argID, kernel->args[argID].bufSize, 32); } std::sort(kernel->patches.begin(), kernel->patches.end()); offset = kernel->getCurbeOffset(GBE_CURBE_EXTRA_ARGUMENT, argID+GBE_CONSTANT_BUFFER); GBE_ASSERT(offset>=0); kernel->curbeSize = ALIGN(kernel->curbeSize, GEN_REG_SIZE); return offset + GEN_REG_SIZE; } uint32_t Context::allocateScratchMem(uint32_t size) { uint32_t offset = scratchOffset; scratchOffset += size; return offset; } void Context::buildStack(void) { const auto &stackUse = dag->getUse(ir::ocl::stackptr); if (stackUse.size() == 0) // no stack is used if stackptr is unused return; // Be sure that the stack pointer is set GBE_ASSERT(this->kernel->getCurbeOffset(GBE_CURBE_STACK_POINTER, 0) >= 0); this->kernel->stackSize = 1*KB; // XXX compute that in a better way } uint32_t Context::newCurbeEntry(gbe_curbe_type value, uint32_t subValue, uint32_t size, uint32_t alignment) { alignment = alignment == 0 ? size : alignment; const uint32_t offset = partitioner->allocate(size, alignment, 1); GBE_ASSERT(offset >= GEN_REG_SIZE); kernel->patches.push_back(PatchInfo(value, subValue, offset - GEN_REG_SIZE)); kernel->curbeSize = std::max(kernel->curbeSize, offset + size - GEN_REG_SIZE); return offset; } uint32_t Context::getImageInfoCurbeOffset(ir::ImageInfoKey key, size_t size) { int32_t offset = fn.getImageSet()->getInfoOffset(key); if (offset >= 0) return offset; newCurbeEntry(GBE_CURBE_IMAGE_INFO, key.data, size, 4); std::sort(kernel->patches.begin(), kernel->patches.end()); offset = kernel->getCurbeOffset(GBE_CURBE_IMAGE_INFO, key.data); GBE_ASSERT(offset >= 0); // XXX do we need to spill it out to bo? fn.getImageSet()->appendInfo(key, offset); return offset + GEN_REG_SIZE; } void Context::insertCurbeReg(ir::Register reg, uint32_t offset) { curbeRegs.insert(std::make_pair(reg, offset)); } void Context::buildPatchList(void) { const uint32_t ptrSize = unit.getPointerSize() == ir::POINTER_32_BITS ? 4u : 8u; kernel->curbeSize = 0u; // We insert the block IP mask first this->insertCurbeReg(ir::ocl::blockip, this->newCurbeEntry(GBE_CURBE_BLOCK_IP, 0, this->simdWidth*sizeof(uint16_t))); // Go over the arguments and find the related patch locations const uint32_t argNum = fn.argNum(); for (uint32_t argID = 0u; argID < argNum; ++argID) { const ir::FunctionArgument &arg = fn.getArg(argID); // For pointers and values, we have nothing to do. We just push the values if (arg.type == ir::FunctionArgument::GLOBAL_POINTER || arg.type == ir::FunctionArgument::LOCAL_POINTER || arg.type == ir::FunctionArgument::CONSTANT_POINTER || arg.type == ir::FunctionArgument::VALUE || arg.type == ir::FunctionArgument::STRUCTURE || arg.type == ir::FunctionArgument::IMAGE || arg.type == ir::FunctionArgument::SAMPLER) this->insertCurbeReg(arg.reg, this->newCurbeEntry(GBE_CURBE_KERNEL_ARGUMENT, argID, arg.size, ptrSize)); } // Already inserted registers go here const size_t localIDSize = sizeof(uint32_t) * this->simdWidth; insertCurbeReg(ir::ocl::lid0, this->newCurbeEntry(GBE_CURBE_LOCAL_ID_X, 0, localIDSize)); insertCurbeReg(ir::ocl::lid1, this->newCurbeEntry(GBE_CURBE_LOCAL_ID_Y, 0, localIDSize)); insertCurbeReg(ir::ocl::lid2, this->newCurbeEntry(GBE_CURBE_LOCAL_ID_Z, 0, localIDSize)); insertCurbeReg(ir::ocl::samplerinfo, this->newCurbeEntry(GBE_CURBE_SAMPLER_INFO, 0, 32)); // Go over all the instructions and find the special register we need // to push #define INSERT_REG(SPECIAL_REG, PATCH, WIDTH) \ if (reg == ir::ocl::SPECIAL_REG) { \ if (curbeRegs.find(reg) != curbeRegs.end()) continue; \ insertCurbeReg(reg, this->newCurbeEntry(GBE_CURBE_##PATCH, 0, ptrSize * WIDTH)); \ } else bool useStackPtr = false; fn.foreachInstruction([&](ir::Instruction &insn) { const uint32_t srcNum = insn.getSrcNum(); for (uint32_t srcID = 0; srcID < srcNum; ++srcID) { const ir::Register reg = insn.getSrc(srcID); if (insn.getOpcode() == ir::OP_GET_IMAGE_INFO) { if (srcID != 0) continue; const unsigned char bti = fn.getImageSet()->getIdx(insn.getSrc(srcID)); const unsigned char type = ir::cast(insn).getInfoType();; ir::ImageInfoKey key; key.index = bti; key.type = type; const ir::Register imageInfo(key.data | 0x8000); ir::Register realImageInfo; if (curbeRegs.find(imageInfo) == curbeRegs.end()) { uint32_t offset = this->getImageInfoCurbeOffset(key, 4); realImageInfo = insn.getSrc(1); insertCurbeReg(realImageInfo, offset); insertCurbeReg(imageInfo, (uint32_t)realImageInfo); } else realImageInfo = ir::Register(curbeRegs.find(imageInfo)->second); insn.setSrc(srcID, realImageInfo); continue; } else if (insn.getOpcode() == ir::OP_GET_SAMPLER_INFO) { /* change the src to sampler information register. */ if (curbeRegs.find(ir::ocl::samplerinfo) == curbeRegs.end()) insertCurbeReg(ir::ocl::samplerinfo, this->newCurbeEntry(GBE_CURBE_SAMPLER_INFO, 0, 32)); continue; } if (fn.isSpecialReg(reg) == false) continue; if (curbeRegs.find(reg) != curbeRegs.end()) continue; if (reg == ir::ocl::stackptr) useStackPtr = true; INSERT_REG(lsize0, LOCAL_SIZE_X, 1) INSERT_REG(lsize1, LOCAL_SIZE_Y, 1) INSERT_REG(lsize2, LOCAL_SIZE_Z, 1) INSERT_REG(gsize0, GLOBAL_SIZE_X, 1) INSERT_REG(gsize1, GLOBAL_SIZE_Y, 1) INSERT_REG(gsize2, GLOBAL_SIZE_Z, 1) INSERT_REG(goffset0, GLOBAL_OFFSET_X, 1) INSERT_REG(goffset1, GLOBAL_OFFSET_Y, 1) INSERT_REG(goffset2, GLOBAL_OFFSET_Z, 1) INSERT_REG(workdim, WORK_DIM, 1) INSERT_REG(numgroup0, GROUP_NUM_X, 1) INSERT_REG(numgroup1, GROUP_NUM_Y, 1) INSERT_REG(numgroup2, GROUP_NUM_Z, 1) INSERT_REG(stackptr, STACK_POINTER, this->simdWidth) do {} while(0); } }); #undef INSERT_REG // Insert the number of threads insertCurbeReg(ir::ocl::threadn, this->newCurbeEntry(GBE_CURBE_THREAD_NUM, 0, sizeof(uint32_t))); // Insert the stack buffer if used if (useStackPtr) insertCurbeReg(ir::ocl::stackptr, this->newCurbeEntry(GBE_CURBE_EXTRA_ARGUMENT, GBE_STACK_BUFFER, ptrSize)); // After this point the vector is immutable. Sorting it will make // research faster std::sort(kernel->patches.begin(), kernel->patches.end()); kernel->curbeSize = ALIGN(kernel->curbeSize, GEN_REG_SIZE); } void Context::buildArgList(void) { kernel->argNum = fn.argNum(); if (kernel->argNum) kernel->args = GBE_NEW_ARRAY_NO_ARG(KernelArgument, kernel->argNum); else kernel->args = NULL; for (uint32_t argID = 0; argID < kernel->argNum; ++argID) { const auto &arg = fn.getArg(argID); switch (arg.type) { case ir::FunctionArgument::VALUE: case ir::FunctionArgument::STRUCTURE: kernel->args[argID].type = GBE_ARG_VALUE; kernel->args[argID].size = arg.size; break; case ir::FunctionArgument::GLOBAL_POINTER: kernel->args[argID].type = GBE_ARG_GLOBAL_PTR; kernel->args[argID].size = sizeof(void*); break; case ir::FunctionArgument::CONSTANT_POINTER: kernel->args[argID].type = GBE_ARG_CONSTANT_PTR; kernel->args[argID].size = sizeof(void*); break; case ir::FunctionArgument::LOCAL_POINTER: kernel->args[argID].type = GBE_ARG_LOCAL_PTR; kernel->args[argID].size = 0; break; case ir::FunctionArgument::IMAGE: kernel->args[argID].type = GBE_ARG_IMAGE; kernel->args[argID].size = sizeof(void*); break; case ir::FunctionArgument::SAMPLER: kernel->args[argID].type = GBE_ARG_SAMPLER; kernel->args[argID].size = sizeof(void*); break; } } } void Context::buildUsedLabels(void) { usedLabels.clear(); fn.foreachInstruction([this](const ir::Instruction &insn) { using namespace ir; if (insn.getOpcode() != OP_BRA) return; const LabelIndex index = cast(insn).getLabelIndex(); usedLabels.insert(index); }); } void Context::buildJIPs(void) { using namespace ir; // Linearly store the branch target for each block and its own label const LabelIndex noTarget(fn.labelNum()); vector> braTargets; int32_t curr = 0, blockNum = fn.blockNum(); braTargets.resize(blockNum); // If some blocks are unused we mark them as such by setting their own label // as "invalid" (== noTarget) for (auto &bb : braTargets) bb = std::make_pair(noTarget, noTarget); fn.foreachBlock([&](const BasicBlock &bb) { const LabelIndex ownLabel = bb.getLabelIndex(); const Instruction *last = bb.getLastInstruction(); if (last->getOpcode() != OP_BRA) braTargets[curr++] = std::make_pair(ownLabel, noTarget); else { const BranchInstruction *bra = cast(last); braTargets[curr++] = std::make_pair(ownLabel, bra->getLabelIndex()); } }); // Backward jumps are special. We must insert the label of the next block // when we hit the "DO" i.e. the target label of the backward branch (as in // do { } while) . So, we store the bwd jumps per targets // XXX does not use custom allocator std::multimap bwdTargets; for (int32_t blockID = 0; blockID < blockNum; ++blockID) { const LabelIndex ownLabel = braTargets[blockID].first; const LabelIndex target = braTargets[blockID].second; if (ownLabel == noTarget) continue; // unused block if (target == noTarget) continue; // no branch if (target <= ownLabel) { // This is a backward jump // Last block is just "RET". So, it cannot be the last block GBE_ASSERT(blockID < blockNum - 1); const LabelIndex fallThrough = braTargets[blockID+1].first; bwdTargets.insert(std::make_pair(target, fallThrough)); } } // Stores the current forward targets set fwdTargets; // Now retraverse the blocks and figure out all JIPs for (int32_t blockID = 0; blockID < blockNum; ++blockID) { const LabelIndex ownLabel = braTargets[blockID].first; const LabelIndex target = braTargets[blockID].second; const BasicBlock &bb = fn.getBlock(ownLabel); const Instruction *label = bb.getFirstInstruction(); const Instruction *bra = bb.getLastInstruction(); // Expires the branches that point to us (if any) auto it = fwdTargets.find(ownLabel); if (it != fwdTargets.end()) fwdTargets.erase(it); // Insert the fall through of the bwd branches that point to us if any auto ii = bwdTargets.equal_range(ownLabel); for (auto it = ii.first; it != ii.second; ++it) fwdTargets.insert(it->second); // If there is an outstanding forward branch, compute a JIP for the label auto lower = fwdTargets.lower_bound(LabelIndex(0)); GBE_ASSERT(label->isMemberOf() == true); if (lower != fwdTargets.end()) JIPs.insert(std::make_pair(label, *lower)); // Handle special cases and backward branches first if (ownLabel == noTarget) continue; // unused block if (target == noTarget) continue; // no branch at all GBE_ASSERT(bra->isMemberOf() == true); if (target <= ownLabel) { // bwd branch: we always jump JIPs.insert(std::make_pair(bra, LabelIndex(target))); continue; } // This is a forward jump, register it and get the JIP fwdTargets.insert(target); auto jip = fwdTargets.lower_bound(LabelIndex(0)); JIPs.insert(std::make_pair(bra, *jip)); } } void Context::handleSLM(void) { const bool useSLM = fn.getUseSLM(); kernel->useSLM = useSLM; kernel->slmSize = fn.getSLMSize(); } bool Context::isScalarReg(const ir::Register ®) const { GBE_ASSERT(fn.getProfile() == ir::Profile::PROFILE_OCL); if (fn.getArg(reg) != NULL) return true; if (fn.getPushLocation(reg) != NULL) return true; if (reg == ir::ocl::groupid0 || reg == ir::ocl::groupid1 || reg == ir::ocl::groupid2 || reg == ir::ocl::barrierid || reg == ir::ocl::threadn || reg == ir::ocl::numgroup0 || reg == ir::ocl::numgroup1 || reg == ir::ocl::numgroup2 || reg == ir::ocl::lsize0 || reg == ir::ocl::lsize1 || reg == ir::ocl::lsize2 || reg == ir::ocl::gsize0 || reg == ir::ocl::gsize1 || reg == ir::ocl::gsize2 || reg == ir::ocl::goffset0 || reg == ir::ocl::goffset1 || reg == ir::ocl::goffset2 || reg == ir::ocl::workdim) return true; return false; } } /* namespace gbe */ Release_v0.3/backend/src/backend/context.hpp000066400000000000000000000140701223142177000211470ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #ifndef __GBE_CONTEXT_HPP__ #define __GBE_CONTEXT_HPP__ #include "ir/instruction.hpp" #include "backend/program.h" #include "sys/set.hpp" #include "sys/map.hpp" #include "sys/platform.hpp" #include namespace gbe { namespace ir { class Unit; // Contains the complete program class Function; // We compile a function into a kernel class Liveness; // Describes liveness of each ir function register class FunctionDAG; // Describes the instruction dependencies } /* namespace ir */ } /* namespace gbe */ namespace gbe { class Kernel; // context creates Kernel class RegisterFilePartitioner; // Partition register file for reg allocation /*! Context is the helper structure to build the Gen ISA or simulation code * from GenIR */ class Context : public NonCopyable { public: /*! Create a new context. name is the name of the function we want to * compile */ Context(const ir::Unit &unit, const std::string &name); /*! Release everything needed */ virtual ~Context(void); /*! Compile the code */ Kernel *compileKernel(void); /*! Tells if the labels is used */ INLINE bool isLabelUsed(ir::LabelIndex index) const { return usedLabels.contains(index); } /*! Get the function graph */ INLINE const ir::FunctionDAG &getFunctionDAG(void) const { return *dag; } /*! Get the liveness information */ INLINE const ir::Liveness &getLiveness(void) const { return *liveness; } /*! Tells if the register is used */ bool isRegUsed(const ir::Register ®) const; /*! Indicate if a register is scalar or not */ bool isScalarReg(const ir::Register ®) const; /*! Get the kernel we are currently compiling */ INLINE Kernel *getKernel(void) const { return this->kernel; } /*! Get the function we are currently compiling */ INLINE const ir::Function &getFunction(void) const { return this->fn; } /*! Get the target label index for the given instruction */ INLINE ir::LabelIndex getLabelIndex(const ir::Instruction *insn) const { GBE_ASSERT(JIPs.find(insn) != JIPs.end()); return JIPs.find(insn)->second; } /*! Only GOTO and some LABEL instructions may have JIPs */ INLINE bool hasJIP(const ir::Instruction *insn) const { return JIPs.find(insn) != JIPs.end(); } /*! Allocate some memory in the register file */ int16_t allocate(int16_t size, int16_t alignment); /*! Deallocate previously allocated memory */ void deallocate(int16_t offset); /*! Spilt a block into 2 blocks, for some registers allocate together but deallocate seperate */ void splitBlock(int16_t offset, int16_t subOffset); /* allocate curbe for constant ptr argument */ int32_t allocConstBuf(uint32_t argID); /* allocate a new entry for a specific image's information */ /*! Get (search or allocate if fail to find one) image info curbeOffset.*/ uint32_t getImageInfoCurbeOffset(ir::ImageInfoKey key, size_t size); /*! allocate size scratch memory and return start address */ uint32_t allocateScratchMem(uint32_t size); /*! Preallocated curbe register set including special registers. */ map curbeRegs; protected: /*! Build the instruction stream. Return false if failed */ virtual bool emitCode(void) = 0; /*! Allocate a new empty kernel (to be implemented) */ virtual Kernel *allocateKernel(void) = 0; /*! Look if a stack is needed and allocate it */ void buildStack(void); /*! Build the curbe patch list for the given kernel */ void buildPatchList(void); /*! Build the list of arguments to set to launch the kernel */ void buildArgList(void); /*! Build the sets of used labels */ void buildUsedLabels(void); /*! Build JIPs for each branch and possibly labels. Can be different from * the branch target due to unstructured branches */ void buildJIPs(void); /*! Configure SLM use if needed */ void handleSLM(void); /*! Insert a new entry with the given size in the Curbe. Return the offset * of the entry */ void insertCurbeReg(ir::Register, uint32_t grfOffset); uint32_t newCurbeEntry(gbe_curbe_type value, uint32_t subValue, uint32_t size, uint32_t alignment = 0); /*! Provide for each branch and label the label index target */ typedef map JIPMap; const ir::Unit &unit; //!< Unit that contains the kernel const ir::Function &fn; //!< Function to compile std::string name; //!< Name of the kernel to compile Kernel *kernel; //!< Kernel we are building ir::Liveness *liveness; //!< Liveness info for the variables ir::FunctionDAG *dag; //!< Graph of values on the function RegisterFilePartitioner *partitioner; //!< Handle register file partionning set usedLabels; //!< Set of all used labels JIPMap JIPs; //!< Where to jump all labels/branches uint32_t simdWidth; //!< Number of lanes per HW threads uint32_t scratchOffset; //!< scratch slot for next scratch memory request GBE_CLASS(Context); //!< Use custom allocators }; } /* namespace gbe */ #endif /* __GBE_CONTEXT_HPP__ */ Release_v0.3/backend/src/backend/gen/000077500000000000000000000000001223142177000175215ustar00rootroot00000000000000Release_v0.3/backend/src/backend/gen/gen_mesa_disasm.c000066400000000000000000001113301223142177000230020ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /* * Copyright 2008 Keith Packard * * Permission to use, copy, modify, distribute, and sell this software and its * documentation for any purpose is hereby granted without fee, provided that * the above copyright notice appear in all copies and that both that copyright * notice and this permission notice appear in supporting documentation, and * that the name of the copyright holders not be used in advertising or * publicity pertaining to distribution of the software without specific, * written prior permission. The copyright holders make no representations * about the suitability of this software for any purpose. It is provided "as * is" without express or implied warranty. * * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE * OF THIS SOFTWARE. */ #include #include #include #include #include #include #include #include #include "backend/gen_defs.hpp" static const struct { const char *name; int nsrc; int ndst; } opcode[128] = { [GEN_OPCODE_MOV] = { .name = "mov", .nsrc = 1, .ndst = 1 }, [GEN_OPCODE_FRC] = { .name = "frc", .nsrc = 1, .ndst = 1 }, [GEN_OPCODE_RNDU] = { .name = "rndu", .nsrc = 1, .ndst = 1 }, [GEN_OPCODE_RNDD] = { .name = "rndd", .nsrc = 1, .ndst = 1 }, [GEN_OPCODE_RNDE] = { .name = "rnde", .nsrc = 1, .ndst = 1 }, [GEN_OPCODE_RNDZ] = { .name = "rndz", .nsrc = 1, .ndst = 1 }, [GEN_OPCODE_NOT] = { .name = "not", .nsrc = 1, .ndst = 1 }, [GEN_OPCODE_LZD] = { .name = "lzd", .nsrc = 1, .ndst = 1 }, [GEN_OPCODE_FBH] = { .name = "fbh", .nsrc = 1, .ndst = 1 }, [GEN_OPCODE_FBL] = { .name = "fbl", .nsrc = 1, .ndst = 1 }, [GEN_OPCODE_MUL] = { .name = "mul", .nsrc = 2, .ndst = 1 }, [GEN_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1 }, [GEN_OPCODE_MACH] = { .name = "mach", .nsrc = 2, .ndst = 1 }, [GEN_OPCODE_LINE] = { .name = "line", .nsrc = 2, .ndst = 1 }, [GEN_OPCODE_PLN] = { .name = "pln", .nsrc = 2, .ndst = 1 }, [GEN_OPCODE_MAD] = { .name = "mad", .nsrc = 3, .ndst = 1 }, [GEN_OPCODE_SAD2] = { .name = "sad2", .nsrc = 2, .ndst = 1 }, [GEN_OPCODE_SADA2] = { .name = "sada2", .nsrc = 2, .ndst = 1 }, [GEN_OPCODE_DP4] = { .name = "dp4", .nsrc = 2, .ndst = 1 }, [GEN_OPCODE_DPH] = { .name = "dph", .nsrc = 2, .ndst = 1 }, [GEN_OPCODE_DP3] = { .name = "dp3", .nsrc = 2, .ndst = 1 }, [GEN_OPCODE_DP2] = { .name = "dp2", .nsrc = 2, .ndst = 1 }, [GEN_OPCODE_MATH] = { .name = "math", .nsrc = 2, .ndst = 1 }, [GEN_OPCODE_AVG] = { .name = "avg", .nsrc = 2, .ndst = 1 }, [GEN_OPCODE_ADD] = { .name = "add", .nsrc = 2, .ndst = 1 }, [GEN_OPCODE_ADDC] = { .name = "addc", .nsrc = 2, .ndst = 1 }, [GEN_OPCODE_SUBB] = { .name = "subb", .nsrc = 2, .ndst = 1 }, [GEN_OPCODE_SEL] = { .name = "sel", .nsrc = 2, .ndst = 1 }, [GEN_OPCODE_AND] = { .name = "and", .nsrc = 2, .ndst = 1 }, [GEN_OPCODE_OR] = { .name = "or", .nsrc = 2, .ndst = 1 }, [GEN_OPCODE_XOR] = { .name = "xor", .nsrc = 2, .ndst = 1 }, [GEN_OPCODE_SHR] = { .name = "shr", .nsrc = 2, .ndst = 1 }, [GEN_OPCODE_SHL] = { .name = "shl", .nsrc = 2, .ndst = 1 }, [GEN_OPCODE_ASR] = { .name = "asr", .nsrc = 2, .ndst = 1 }, [GEN_OPCODE_CMP] = { .name = "cmp", .nsrc = 2, .ndst = 1 }, [GEN_OPCODE_CMPN] = { .name = "cmpn", .nsrc = 2, .ndst = 1 }, [GEN_OPCODE_SEND] = { .name = "send", .nsrc = 1, .ndst = 1 }, [GEN_OPCODE_SENDC] = { .name = "sendc", .nsrc = 1, .ndst = 1 }, [GEN_OPCODE_NOP] = { .name = "nop", .nsrc = 0, .ndst = 0 }, [GEN_OPCODE_JMPI] = { .name = "jmpi", .nsrc = 0, .ndst = 0 }, [GEN_OPCODE_IF] = { .name = "if", .nsrc = 2, .ndst = 0 }, [GEN_OPCODE_IFF] = { .name = "iff", .nsrc = 2, .ndst = 1 }, [GEN_OPCODE_WHILE] = { .name = "while", .nsrc = 2, .ndst = 0 }, [GEN_OPCODE_ELSE] = { .name = "else", .nsrc = 2, .ndst = 0 }, [GEN_OPCODE_BREAK] = { .name = "break", .nsrc = 2, .ndst = 0 }, [GEN_OPCODE_CONTINUE] = { .name = "cont", .nsrc = 1, .ndst = 0 }, [GEN_OPCODE_HALT] = { .name = "halt", .nsrc = 1, .ndst = 0 }, [GEN_OPCODE_MSAVE] = { .name = "msave", .nsrc = 1, .ndst = 1 }, [GEN_OPCODE_PUSH] = { .name = "push", .nsrc = 1, .ndst = 1 }, [GEN_OPCODE_MRESTORE] = { .name = "mrest", .nsrc = 1, .ndst = 1 }, [GEN_OPCODE_POP] = { .name = "pop", .nsrc = 2, .ndst = 0 }, [GEN_OPCODE_WAIT] = { .name = "wait", .nsrc = 1, .ndst = 0 }, [GEN_OPCODE_DO] = { .name = "do", .nsrc = 0, .ndst = 0 }, [GEN_OPCODE_ENDIF] = { .name = "endif", .nsrc = 2, .ndst = 0 }, }; static const char *conditional_modifier[16] = { [GEN_CONDITIONAL_NONE] = "", [GEN_CONDITIONAL_Z] = ".e", [GEN_CONDITIONAL_NZ] = ".ne", [GEN_CONDITIONAL_G] = ".g", [GEN_CONDITIONAL_GE] = ".ge", [GEN_CONDITIONAL_L] = ".l", [GEN_CONDITIONAL_LE] = ".le", [GEN_CONDITIONAL_R] = ".r", [GEN_CONDITIONAL_O] = ".o", [GEN_CONDITIONAL_U] = ".u", }; static const char *negate[2] = { [0] = "", [1] = "-", }; static const char *_abs[2] = { [0] = "", [1] = "(abs)", }; static const char *vert_stride[16] = { [0] = "0", [1] = "1", [2] = "2", [3] = "4", [4] = "8", [5] = "16", [6] = "32", [15] = "VxH", }; static const char *width[8] = { [0] = "1", [1] = "2", [2] = "4", [3] = "8", [4] = "16", }; static const char *horiz_stride[4] = { [0] = "0", [1] = "1", [2] = "2", [3] = "4" }; static const char *chan_sel[4] = { [0] = "x", [1] = "y", [2] = "z", [3] = "w", }; static const char *debug_ctrl[2] = { [0] = "", [1] = ".breakpoint" }; static const char *saturate[2] = { [0] = "", [1] = ".sat" }; static const char *accwr[2] = { [0] = "", [1] = "AccWrEnable" }; static const char *wectrl[2] = { [0] = "WE_normal", [1] = "WE_all" }; static const char *exec_size[8] = { [0] = "1", [1] = "2", [2] = "4", [3] = "8", [4] = "16", [5] = "32" }; static const char *pred_inv[2] = { [0] = "+", [1] = "-" }; static const char *pred_ctrl_align16[16] = { [1] = "", [2] = ".x", [3] = ".y", [4] = ".z", [5] = ".w", [6] = ".any4h", [7] = ".all4h", }; static const char *pred_ctrl_align1[16] = { [1] = "", [2] = ".anyv", [3] = ".allv", [4] = ".any2h", [5] = ".all2h", [6] = ".any4h", [7] = ".all4h", [8] = ".any8h", [9] = ".all8h", [10] = ".any16h", [11] = ".all16h", }; static const char *thread_ctrl[4] = { [0] = "", [2] = "switch" }; static const char *dep_ctrl[4] = { [0] = "", [1] = "NoDDClr", [2] = "NoDDChk", [3] = "NoDDClr,NoDDChk", }; static const char *mask_ctrl[4] = { [0] = "", [1] = "nomask", }; static const char *access_mode[2] = { [0] = "align1", [1] = "align16", }; static const char *reg_encoding[8] = { [0] = "UD", [1] = "D", [2] = "UW", [3] = "W", [4] = "UB", [5] = "B", [6] = "DF", [7] = "F" }; int reg_type_size[8] = { [0] = 4, [1] = 4, [2] = 2, [3] = 2, [4] = 1, [5] = 1, [6] = 8, [7] = 4 }; static const char *reg_file[4] = { [0] = "A", [1] = "g", [2] = "m", [3] = "imm", }; static const char *writemask[16] = { [0x0] = ".", [0x1] = ".x", [0x2] = ".y", [0x3] = ".xy", [0x4] = ".z", [0x5] = ".xz", [0x6] = ".yz", [0x7] = ".xyz", [0x8] = ".w", [0x9] = ".xw", [0xa] = ".yw", [0xb] = ".xyw", [0xc] = ".zw", [0xd] = ".xzw", [0xe] = ".yzw", [0xf] = "", }; static const char *end_of_thread[2] = { [0] = "", [1] = "EOT" }; static const char *target_function_gen6[16] = { [GEN_SFID_NULL] = "null", [GEN_SFID_MATH] = "math", [GEN_SFID_SAMPLER] = "sampler", [GEN_SFID_MESSAGE_GATEWAY] = "gateway", [GEN_SFID_URB] = "urb", [GEN_SFID_THREAD_SPAWNER] = "thread_spawner", [GEN6_SFID_DATAPORT_SAMPLER_CACHE] = "sampler", [GEN6_SFID_DATAPORT_RENDER_CACHE] = "render", [GEN6_SFID_DATAPORT_CONSTANT_CACHE] = "const", [GEN_SFID_DATAPORT_DATA_CACHE] = "data" }; static const char *gateway_sub_function[8] = { [0] = "open gateway", [1] = "close gateway", [2] = "forward gateway", [3] = "get time stamp", [4] = "barrier", [5] = "update gateway state", [6] = "MMIO R/W", [7] = "reserved" }; static const char *math_function[16] = { [GEN_MATH_FUNCTION_INV] = "inv", [GEN_MATH_FUNCTION_LOG] = "log", [GEN_MATH_FUNCTION_EXP] = "exp", [GEN_MATH_FUNCTION_SQRT] = "sqrt", [GEN_MATH_FUNCTION_RSQ] = "rsq", [GEN_MATH_FUNCTION_SIN] = "sin", [GEN_MATH_FUNCTION_COS] = "cos", [GEN_MATH_FUNCTION_FDIV] = "fdiv", [GEN_MATH_FUNCTION_POW] = "pow", [GEN_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER] = "intdivmod", [GEN_MATH_FUNCTION_INT_DIV_QUOTIENT] = "intdiv", [GEN_MATH_FUNCTION_INT_DIV_REMAINDER] = "intmod", }; static const char *math_saturate[2] = { [0] = "", [1] = "sat" }; static const char *math_signed[2] = { [0] = "", [1] = "signed" }; static const char *math_scalar[2] = { [0] = "", [1] = "scalar" }; static const char *math_precision[2] = { [0] = "", [1] = "partial_precision" }; static const char *data_port_data_cache_simd_mode[] = { "SIMD4x2", "SIMD16", "SIMD8", }; static const char *data_port_data_cache_category[] = { "legacy", "scratch", }; static const char *data_port_scratch_block_size[] = { "1 register", "2 registers", "Reserve", "4 registers", }; static const char *data_port_scratch_invalidate[] = { "no invalidate", "invalidate cache line", }; static const char *data_port_scratch_channel_mode[] = { "Oword", "Dword", }; static const char *data_port_scratch_msg_type[] = { "Scratch Read", "Scratch Write", }; static const char *data_port_data_cache_msg_type[] = { [0] = "OWord Block Read", [1] = "Unaligned OWord Block Read", [2] = "OWord Dual Block Read", [3] = "DWord Scattered Read", [4] = "Byte Scattered Read", [5] = "Untyped Surface Read", [6] = "Untyped Atomic Operation", [7] = "Memory Fence", [8] = "OWord Block Write", [10] = "OWord Dual Block Write", [11] = "DWord Scattered Write", [12] = "Byte Scattered Write", [13] = "Untyped Surface Write", }; static int column; static int string (FILE *file, const char *string) { fputs (string, file); column += strlen (string); return 0; } static int format (FILE *f, const char *format, ...) { char buf[1024]; va_list args; va_start (args, format); vsnprintf (buf, sizeof (buf) - 1, format, args); va_end (args); string (f, buf); return 0; } static int newline (FILE *f) { putc ('\n', f); column = 0; return 0; } static int pad (FILE *f, int c) { do string (f, " "); while (column < c); return 0; } static int flag_reg (FILE *file, const int flag_nr, const int flag_sub_reg_nr) { if (flag_nr || flag_sub_reg_nr) return format (file, ".f%d.%d", flag_nr, flag_sub_reg_nr); return 0; } static int control (FILE *file, const char *name, const char *ctrl[], uint32_t id, int *space) { if (!ctrl[id]) { fprintf (file, "*** invalid %s value %d ", name, id); return 1; } if (ctrl[id][0]) { if (space && *space) string (file, " "); string (file, ctrl[id]); if (space) *space = 1; } return 0; } static int print_opcode (FILE *file, int id) { if (!opcode[id].name) { format (file, "*** invalid opcode value %d ", id); return 1; } string (file, opcode[id].name); return 0; } static int reg (FILE *file, uint32_t _reg_file, uint32_t _reg_nr) { int err = 0; if (_reg_file == GEN_ARCHITECTURE_REGISTER_FILE) { switch (_reg_nr & 0xf0) { case GEN_ARF_NULL: string (file, "null"); return -1; case GEN_ARF_ADDRESS: format (file, "a%d", _reg_nr & 0x0f); break; case GEN_ARF_ACCUMULATOR: format (file, "acc%d", _reg_nr & 0x0f); break; case GEN_ARF_FLAG: format (file, "f%d", _reg_nr & 0x0f); break; case GEN_ARF_MASK: format (file, "mask%d", _reg_nr & 0x0f); break; case GEN_ARF_MASK_STACK: format (file, "msd%d", _reg_nr & 0x0f); break; case GEN_ARF_STATE: format (file, "sr%d", _reg_nr & 0x0f); break; case GEN_ARF_CONTROL: format (file, "cr%d", _reg_nr & 0x0f); break; case GEN_ARF_NOTIFICATION_COUNT: format (file, "n%d", _reg_nr & 0x0f); break; case GEN_ARF_IP: string (file, "ip"); return -1; break; default: format (file, "ARF%d", _reg_nr); break; } } else { err |= control (file, "src reg file", reg_file, _reg_file, NULL); format (file, "%d", _reg_nr); } return err; } static int dest (FILE *file, const struct GenInstruction *inst) { int err = 0; if (inst->header.access_mode == GEN_ALIGN_1) { if (inst->bits1.da1.dest_address_mode == GEN_ADDRESS_DIRECT) { err |= reg (file, inst->bits1.da1.dest_reg_file, inst->bits1.da1.dest_reg_nr); if (err == -1) return 0; if (inst->bits1.da1.dest_subreg_nr) format (file, ".%d", inst->bits1.da1.dest_subreg_nr / reg_type_size[inst->bits1.da1.dest_reg_type]); format (file, "<%s>", horiz_stride[inst->bits1.da1.dest_horiz_stride]); err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da1.dest_reg_type, NULL); } else { string (file, "g[a0"); if (inst->bits1.ia1.dest_subreg_nr) format (file, ".%d", inst->bits1.ia1.dest_subreg_nr / reg_type_size[inst->bits1.ia1.dest_reg_type]); if (inst->bits1.ia1.dest_indirect_offset) format (file, " %d", inst->bits1.ia1.dest_indirect_offset); string (file, "]"); format (file, "<%s>", horiz_stride[inst->bits1.ia1.dest_horiz_stride]); err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.ia1.dest_reg_type, NULL); } } else { if (inst->bits1.da16.dest_address_mode == GEN_ADDRESS_DIRECT) { err |= reg (file, inst->bits1.da16.dest_reg_file, inst->bits1.da16.dest_reg_nr); if (err == -1) return 0; if (inst->bits1.da16.dest_subreg_nr) format (file, ".%d", inst->bits1.da16.dest_subreg_nr / reg_type_size[inst->bits1.da16.dest_reg_type]); string (file, "<1>"); err |= control (file, "writemask", writemask, inst->bits1.da16.dest_writemask, NULL); err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da16.dest_reg_type, NULL); } else { err = 1; string (file, "Indirect align16 address mode not supported"); } } return 0; } static int dest_3src (FILE *file, const struct GenInstruction *inst) { int err = 0; const uint32_t reg_file = GEN_GENERAL_REGISTER_FILE; err |= reg (file, reg_file, inst->bits1.da3src.dest_reg_nr); if (err == -1) return 0; if (inst->bits1.da3src.dest_subreg_nr) format (file, ".%d", inst->bits1.da3src.dest_subreg_nr); string (file, "<1>"); err |= control (file, "writemask", writemask, inst->bits1.da3src.dest_writemask, NULL); err |= control (file, "dest reg encoding", reg_encoding, GEN_TYPE_F, NULL); return 0; } static int src_align1_region (FILE *file, uint32_t _vert_stride, uint32_t _width, uint32_t _horiz_stride) { int err = 0; string (file, "<"); err |= control (file, "vert stride", vert_stride, _vert_stride, NULL); string (file, ","); err |= control (file, "width", width, _width, NULL); string (file, ","); err |= control (file, "horiz_stride", horiz_stride, _horiz_stride, NULL); string (file, ">"); return err; } static int src_da1 (FILE *file, uint32_t type, uint32_t _reg_file, uint32_t _vert_stride, uint32_t _width, uint32_t _horiz_stride, uint32_t reg_num, uint32_t sub_reg_num, uint32_t __abs, uint32_t _negate) { int err = 0; err |= control (file, "negate", negate, _negate, NULL); err |= control (file, "abs", _abs, __abs, NULL); err |= reg (file, _reg_file, reg_num); if (err == -1) return 0; if (sub_reg_num) format (file, ".%d", sub_reg_num / reg_type_size[type]); /* use formal style like spec */ src_align1_region (file, _vert_stride, _width, _horiz_stride); err |= control (file, "src reg encoding", reg_encoding, type, NULL); return err; } static int src_ia1 (FILE *file, uint32_t type, uint32_t _reg_file, int32_t _addr_imm, uint32_t _addr_subreg_nr, uint32_t _negate, uint32_t __abs, uint32_t _addr_mode, uint32_t _horiz_stride, uint32_t _width, uint32_t _vert_stride) { int err = 0; err |= control (file, "negate", negate, _negate, NULL); err |= control (file, "abs", _abs, __abs, NULL); string (file, "g[a0"); if (_addr_subreg_nr) format (file, ".%d", _addr_subreg_nr); if (_addr_imm) format (file, " %d", _addr_imm); string (file, "]"); src_align1_region (file, _vert_stride, _width, _horiz_stride); err |= control (file, "src reg encoding", reg_encoding, type, NULL); return err; } static int src_da16 (FILE *file, uint32_t _reg_type, uint32_t _reg_file, uint32_t _vert_stride, uint32_t _reg_nr, uint32_t _subreg_nr, uint32_t __abs, uint32_t _negate, uint32_t swz_x, uint32_t swz_y, uint32_t swz_z, uint32_t swz_w) { int err = 0; err |= control (file, "negate", negate, _negate, NULL); err |= control (file, "abs", _abs, __abs, NULL); err |= reg (file, _reg_file, _reg_nr); if (err == -1) return 0; if (_subreg_nr) /* bit4 for subreg number byte addressing. Make this same meaning as in da1 case, so output looks consistent. */ format (file, ".%d", 16 / reg_type_size[_reg_type]); string (file, "<"); err |= control (file, "vert stride", vert_stride, _vert_stride, NULL); string (file, ",4,1>"); /* * Three kinds of swizzle display: * identity - nothing printed * 1->all - print the single channel * 1->1 - print the mapping */ if (swz_x == GEN_CHANNEL_X && swz_y == GEN_CHANNEL_Y && swz_z == GEN_CHANNEL_Z && swz_w == GEN_CHANNEL_W) { ; } else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w) { string (file, "."); err |= control (file, "channel select", chan_sel, swz_x, NULL); } else { string (file, "."); err |= control (file, "channel select", chan_sel, swz_x, NULL); err |= control (file, "channel select", chan_sel, swz_y, NULL); err |= control (file, "channel select", chan_sel, swz_z, NULL); err |= control (file, "channel select", chan_sel, swz_w, NULL); } err |= control (file, "src da16 reg type", reg_encoding, _reg_type, NULL); return err; } static int src0_3src (FILE *file, const struct GenInstruction *inst) { int err = 0; uint32_t swz_x = (inst->bits2.da3src.src0_swizzle >> 0) & 0x3; uint32_t swz_y = (inst->bits2.da3src.src0_swizzle >> 2) & 0x3; uint32_t swz_z = (inst->bits2.da3src.src0_swizzle >> 4) & 0x3; uint32_t swz_w = (inst->bits2.da3src.src0_swizzle >> 6) & 0x3; err |= control (file, "negate", negate, inst->bits1.da3src.src0_negate, NULL); err |= control (file, "abs", _abs, inst->bits1.da3src.src0_abs, NULL); err |= reg (file, GEN_GENERAL_REGISTER_FILE, inst->bits2.da3src.src0_reg_nr); if (err == -1) return 0; if (inst->bits2.da3src.src0_subreg_nr) format (file, ".%d", inst->bits2.da3src.src0_subreg_nr); string (file, "<4,1,1>"); err |= control (file, "src da16 reg type", reg_encoding, GEN_TYPE_F, NULL); /* * Three kinds of swizzle display: * identity - nothing printed * 1->all - print the single channel * 1->1 - print the mapping */ if (swz_x == GEN_CHANNEL_X && swz_y == GEN_CHANNEL_Y && swz_z == GEN_CHANNEL_Z && swz_w == GEN_CHANNEL_W) { ; } else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w) { string (file, "."); err |= control (file, "channel select", chan_sel, swz_x, NULL); } else { string (file, "."); err |= control (file, "channel select", chan_sel, swz_x, NULL); err |= control (file, "channel select", chan_sel, swz_y, NULL); err |= control (file, "channel select", chan_sel, swz_z, NULL); err |= control (file, "channel select", chan_sel, swz_w, NULL); } return err; } static int src1_3src (FILE *file, const struct GenInstruction *inst) { int err = 0; uint32_t swz_x = (inst->bits2.da3src.src1_swizzle >> 0) & 0x3; uint32_t swz_y = (inst->bits2.da3src.src1_swizzle >> 2) & 0x3; uint32_t swz_z = (inst->bits2.da3src.src1_swizzle >> 4) & 0x3; uint32_t swz_w = (inst->bits2.da3src.src1_swizzle >> 6) & 0x3; uint32_t src1_subreg_nr = (inst->bits2.da3src.src1_subreg_nr_low | (inst->bits3.da3src.src1_subreg_nr_high << 2)); err |= control (file, "negate", negate, inst->bits1.da3src.src1_negate, NULL); err |= control (file, "abs", _abs, inst->bits1.da3src.src1_abs, NULL); err |= reg (file, GEN_GENERAL_REGISTER_FILE, inst->bits3.da3src.src1_reg_nr); if (err == -1) return 0; if (src1_subreg_nr) format (file, ".%d", src1_subreg_nr); string (file, "<4,1,1>"); err |= control (file, "src da16 reg type", reg_encoding, GEN_TYPE_F, NULL); /* * Three kinds of swizzle display: * identity - nothing printed * 1->all - print the single channel * 1->1 - print the mapping */ if (swz_x == GEN_CHANNEL_X && swz_y == GEN_CHANNEL_Y && swz_z == GEN_CHANNEL_Z && swz_w == GEN_CHANNEL_W) { ; } else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w) { string (file, "."); err |= control (file, "channel select", chan_sel, swz_x, NULL); } else { string (file, "."); err |= control (file, "channel select", chan_sel, swz_x, NULL); err |= control (file, "channel select", chan_sel, swz_y, NULL); err |= control (file, "channel select", chan_sel, swz_z, NULL); err |= control (file, "channel select", chan_sel, swz_w, NULL); } return err; } static int src2_3src (FILE *file, const struct GenInstruction *inst) { int err = 0; uint32_t swz_x = (inst->bits3.da3src.src2_swizzle >> 0) & 0x3; uint32_t swz_y = (inst->bits3.da3src.src2_swizzle >> 2) & 0x3; uint32_t swz_z = (inst->bits3.da3src.src2_swizzle >> 4) & 0x3; uint32_t swz_w = (inst->bits3.da3src.src2_swizzle >> 6) & 0x3; err |= control (file, "negate", negate, inst->bits1.da3src.src2_negate, NULL); err |= control (file, "abs", _abs, inst->bits1.da3src.src2_abs, NULL); err |= reg (file, GEN_GENERAL_REGISTER_FILE, inst->bits3.da3src.src2_reg_nr); if (err == -1) return 0; if (inst->bits3.da3src.src2_subreg_nr) format (file, ".%d", inst->bits3.da3src.src2_subreg_nr); string (file, "<4,1,1>"); err |= control (file, "src da16 reg type", reg_encoding, GEN_TYPE_F, NULL); /* * Three kinds of swizzle display: * identity - nothing printed * 1->all - print the single channel * 1->1 - print the mapping */ if (swz_x == GEN_CHANNEL_X && swz_y == GEN_CHANNEL_Y && swz_z == GEN_CHANNEL_Z && swz_w == GEN_CHANNEL_W) { ; } else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w) { string (file, "."); err |= control (file, "channel select", chan_sel, swz_x, NULL); } else { string (file, "."); err |= control (file, "channel select", chan_sel, swz_x, NULL); err |= control (file, "channel select", chan_sel, swz_y, NULL); err |= control (file, "channel select", chan_sel, swz_z, NULL); err |= control (file, "channel select", chan_sel, swz_w, NULL); } return err; } static int imm (FILE *file, uint32_t type, const struct GenInstruction *inst) { switch (type) { case GEN_TYPE_UD: format (file, "0x%xUD", inst->bits3.ud); break; case GEN_TYPE_D: format (file, "%dD", inst->bits3.d); break; case GEN_TYPE_UW: format (file, "0x%xUW", (uint16_t) inst->bits3.ud); break; case GEN_TYPE_W: format (file, "%dW", (int16_t) inst->bits3.d); break; case GEN_TYPE_UB: format (file, "0x%xUB", (int8_t) inst->bits3.ud); break; case GEN_TYPE_VF: format (file, "Vector Float"); break; case GEN_TYPE_V: format (file, "0x%xV", inst->bits3.ud); break; case GEN_TYPE_F: format (file, "%-gF", inst->bits3.f); } return 0; } static int src0 (FILE *file, const struct GenInstruction *inst) { if (inst->bits1.da1.src0_reg_file == GEN_IMMEDIATE_VALUE) return imm (file, inst->bits1.da1.src0_reg_type, inst); else if (inst->header.access_mode == GEN_ALIGN_1) { if (inst->bits2.da1.src0_address_mode == GEN_ADDRESS_DIRECT) { return src_da1 (file, inst->bits1.da1.src0_reg_type, inst->bits1.da1.src0_reg_file, inst->bits2.da1.src0_vert_stride, inst->bits2.da1.src0_width, inst->bits2.da1.src0_horiz_stride, inst->bits2.da1.src0_reg_nr, inst->bits2.da1.src0_subreg_nr, inst->bits2.da1.src0_abs, inst->bits2.da1.src0_negate); } else { return src_ia1 (file, inst->bits1.ia1.src0_reg_type, inst->bits1.ia1.src0_reg_file, inst->bits2.ia1.src0_indirect_offset, inst->bits2.ia1.src0_subreg_nr, inst->bits2.ia1.src0_negate, inst->bits2.ia1.src0_abs, inst->bits2.ia1.src0_address_mode, inst->bits2.ia1.src0_horiz_stride, inst->bits2.ia1.src0_width, inst->bits2.ia1.src0_vert_stride); } } else { if (inst->bits2.da16.src0_address_mode == GEN_ADDRESS_DIRECT) { return src_da16 (file, inst->bits1.da16.src0_reg_type, inst->bits1.da16.src0_reg_file, inst->bits2.da16.src0_vert_stride, inst->bits2.da16.src0_reg_nr, inst->bits2.da16.src0_subreg_nr, inst->bits2.da16.src0_abs, inst->bits2.da16.src0_negate, inst->bits2.da16.src0_swz_x, inst->bits2.da16.src0_swz_y, inst->bits2.da16.src0_swz_z, inst->bits2.da16.src0_swz_w); } else { string (file, "Indirect align16 address mode not supported"); return 1; } } } static int src1 (FILE *file, const struct GenInstruction *inst) { if (inst->bits1.da1.src1_reg_file == GEN_IMMEDIATE_VALUE) return imm (file, inst->bits1.da1.src1_reg_type, inst); else if (inst->header.access_mode == GEN_ALIGN_1) { if (inst->bits3.da1.src1_address_mode == GEN_ADDRESS_DIRECT) { return src_da1 (file, inst->bits1.da1.src1_reg_type, inst->bits1.da1.src1_reg_file, inst->bits3.da1.src1_vert_stride, inst->bits3.da1.src1_width, inst->bits3.da1.src1_horiz_stride, inst->bits3.da1.src1_reg_nr, inst->bits3.da1.src1_subreg_nr, inst->bits3.da1.src1_abs, inst->bits3.da1.src1_negate); } else { return src_ia1 (file, inst->bits1.ia1.src1_reg_type, inst->bits1.ia1.src1_reg_file, inst->bits3.ia1.src1_indirect_offset, inst->bits3.ia1.src1_subreg_nr, inst->bits3.ia1.src1_negate, inst->bits3.ia1.src1_abs, inst->bits3.ia1.src1_address_mode, inst->bits3.ia1.src1_horiz_stride, inst->bits3.ia1.src1_width, inst->bits3.ia1.src1_vert_stride); } } else { if (inst->bits3.da16.src1_address_mode == GEN_ADDRESS_DIRECT) { return src_da16 (file, inst->bits1.da16.src1_reg_type, inst->bits1.da16.src1_reg_file, inst->bits3.da16.src1_vert_stride, inst->bits3.da16.src1_reg_nr, inst->bits3.da16.src1_subreg_nr, inst->bits3.da16.src1_abs, inst->bits3.da16.src1_negate, inst->bits3.da16.src1_swz_x, inst->bits3.da16.src1_swz_y, inst->bits3.da16.src1_swz_z, inst->bits3.da16.src1_swz_w); } else { string (file, "Indirect align16 address mode not supported"); return 1; } } } static const int esize[6] = { [0] = 1, [1] = 2, [2] = 4, [3] = 8, [4] = 16, [5] = 32, }; static int qtr_ctrl(FILE *file, const struct GenInstruction *inst) { int qtr_ctl = inst->header.quarter_control; int exec_size = esize[inst->header.execution_size]; if (exec_size == 8) { switch (qtr_ctl) { case 0: string (file, " 1Q"); break; case 1: string (file, " 2Q"); break; case 2: string (file, " 3Q"); break; case 3: string (file, " 4Q"); break; } } else if (exec_size == 16){ if (qtr_ctl < 2) string (file, " 1H"); else string (file, " 2H"); } return 0; } int gen_disasm (FILE *file, const void *opaque_insn) { const struct GenInstruction *inst = (const struct GenInstruction *) opaque_insn; int err = 0; int space = 0; int gen = 7; if (inst->header.predicate_control) { string (file, "("); err |= control (file, "predicate inverse", pred_inv, inst->header.predicate_inverse, NULL); format (file, "f%d", inst->bits2.da1.flag_reg_nr); if (inst->bits2.da1.flag_sub_reg_nr) format (file, ".%d", inst->bits2.da1.flag_sub_reg_nr); if (inst->header.access_mode == GEN_ALIGN_1) err |= control (file, "predicate control align1", pred_ctrl_align1, inst->header.predicate_control, NULL); else err |= control (file, "predicate control align16", pred_ctrl_align16, inst->header.predicate_control, NULL); string (file, ") "); } err |= print_opcode (file, inst->header.opcode); err |= control (file, "saturate", saturate, inst->header.saturate, NULL); err |= control (file, "debug control", debug_ctrl, inst->header.debug_control, NULL); if (inst->header.opcode == GEN_OPCODE_MATH) { string (file, " "); err |= control (file, "function", math_function, inst->header.destreg_or_condmod, NULL); } else if (inst->header.opcode != GEN_OPCODE_SEND && inst->header.opcode != GEN_OPCODE_SENDC) { err |= control (file, "conditional modifier", conditional_modifier, inst->header.destreg_or_condmod, NULL); if (inst->header.destreg_or_condmod) err |= flag_reg (file, inst->bits2.da1.flag_reg_nr, inst->bits2.da1.flag_sub_reg_nr); } if (inst->header.opcode != GEN_OPCODE_NOP) { string (file, "("); err |= control (file, "execution size", exec_size, inst->header.execution_size, NULL); string (file, ")"); } if (inst->header.opcode == GEN_OPCODE_SEND && gen < 6) format (file, " %d", inst->header.destreg_or_condmod); if (opcode[inst->header.opcode].nsrc == 3) { pad (file, 16); err |= dest_3src (file, inst); pad (file, 32); err |= src0_3src (file, inst); pad (file, 48); err |= src1_3src (file, inst); pad (file, 64); err |= src2_3src (file, inst); } else { if (opcode[inst->header.opcode].ndst > 0) { pad (file, 16); err |= dest (file, inst); } else if (gen >= 6 && (inst->header.opcode == GEN_OPCODE_IF || inst->header.opcode == GEN_OPCODE_ELSE || inst->header.opcode == GEN_OPCODE_ENDIF || inst->header.opcode == GEN_OPCODE_WHILE)) { // XXX format (file, " %d", inst->bits1.branch_gen6.jump_count); assert(0); } else if (gen >= 6 && (inst->header.opcode == GEN_OPCODE_BREAK || inst->header.opcode == GEN_OPCODE_CONTINUE || inst->header.opcode == GEN_OPCODE_HALT)) { // XXX format (file, " %d %d", inst->bits3.break_cont.uip, inst->bits3.break_cont.jip); assert(0); } else if (inst->header.opcode == GEN_OPCODE_JMPI) { format (file, " %d", inst->bits3.d); } if (opcode[inst->header.opcode].nsrc > 0) { pad (file, 32); err |= src0 (file, inst); } if (opcode[inst->header.opcode].nsrc > 1) { pad (file, 48); err |= src1 (file, inst); } } if (inst->header.opcode == GEN_OPCODE_SEND || inst->header.opcode == GEN_OPCODE_SENDC) { enum GenMessageTarget target = inst->header.destreg_or_condmod; newline (file); pad (file, 16); space = 0; err |= control (file, "target function", target_function_gen6, target, &space); switch (target) { case GEN_SFID_MATH: err |= control (file, "math function", math_function, inst->bits3.math_gen5.function, &space); err |= control (file, "math saturate", math_saturate, inst->bits3.math_gen5.saturate, &space); err |= control (file, "math signed", math_signed, inst->bits3.math_gen5.int_type, &space); err |= control (file, "math scalar", math_scalar, inst->bits3.math_gen5.data_type, &space); err |= control (file, "math precision", math_precision, inst->bits3.math_gen5.precision, &space); break; case GEN_SFID_SAMPLER: format (file, " (%d, %d, %d, %d)", inst->bits3.sampler_gen7.bti, inst->bits3.sampler_gen7.sampler, inst->bits3.sampler_gen7.msg_type, inst->bits3.sampler_gen7.simd_mode); break; case GEN_SFID_DATAPORT_DATA_CACHE: if(inst->bits3.gen7_untyped_rw.category == 0) { format (file, " (bti: %d, rgba: %d, %s, %s, %s)", inst->bits3.gen7_untyped_rw.bti, inst->bits3.gen7_untyped_rw.rgba, data_port_data_cache_simd_mode[inst->bits3.gen7_untyped_rw.simd_mode], data_port_data_cache_category[inst->bits3.gen7_untyped_rw.category], data_port_data_cache_msg_type[inst->bits3.gen7_untyped_rw.msg_type]); } else { format (file, " (addr: %d, blocks: %s, %s, mode: %s, %s)", inst->bits3.gen7_scratch_rw.offset, data_port_scratch_block_size[inst->bits3.gen7_scratch_rw.block_size], data_port_scratch_invalidate[inst->bits3.gen7_scratch_rw.invalidate_after_read], data_port_scratch_channel_mode[inst->bits3.gen7_scratch_rw.channel_mode], data_port_scratch_msg_type[inst->bits3.gen7_scratch_rw.msg_type]); } break; case GEN6_SFID_DATAPORT_CONSTANT_CACHE: format (file, " (bti: %d, %s)", inst->bits3.gen7_dword_rw.bti, data_port_data_cache_msg_type[inst->bits3.gen7_dword_rw.msg_type]); break; case GEN_SFID_MESSAGE_GATEWAY: format (file, " (subfunc: %s, notify: %d, ackreq: %d)", gateway_sub_function[inst->bits3.gen7_msg_gw.subfunc], inst->bits3.gen7_msg_gw.notify, inst->bits3.gen7_msg_gw.ackreq); break; default: format (file, "unsupported target %d", target); break; } if (space) string (file, " "); format (file, "mlen %d", inst->bits3.generic_gen5.msg_length); format (file, " rlen %d", inst->bits3.generic_gen5.response_length); } pad (file, 64); if (inst->header.opcode != GEN_OPCODE_NOP) { string (file, "{"); space = 1; err |= control(file, "access mode", access_mode, inst->header.access_mode, &space); if (gen >= 6) err |= control (file, "write enable control", wectrl, inst->header.mask_control, &space); else err |= control (file, "mask control", mask_ctrl, inst->header.mask_control, &space); err |= control (file, "dependency control", dep_ctrl, inst->header.dependency_control, &space); err |= qtr_ctrl (file, inst); err |= control (file, "thread control", thread_ctrl, inst->header.thread_control, &space); if (gen >= 6) err |= control (file, "acc write control", accwr, inst->header.acc_wr_control, &space); if (inst->header.opcode == GEN_OPCODE_SEND || inst->header.opcode == GEN_OPCODE_SENDC) err |= control (file, "end of thread", end_of_thread, inst->bits3.generic_gen5.end_of_thread, &space); if (space) string (file, " "); string (file, "}"); } string (file, ";"); newline (file); return err; } Release_v0.3/backend/src/backend/gen/gen_mesa_disasm.h000066400000000000000000000023511223142177000230110ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file gen_mesa_disasm.h * \author Benjamin Segovia * * To decode and print one Gen ISA instruction. The code is directly taken * from Mesa */ #ifndef __GBE_GEN_MESA_DISASM_H__ #define __GBE_GEN_MESA_DISASM_H__ #include #ifdef __cplusplus extern "C" { #endif /* __cplusplus */ extern int gen_disasm(FILE *file, const void *opaque_insn); #ifdef __cplusplus } #endif /* __cplusplus */ #endif /* __GBE_GEN_MESA_DISASM_H__ */ Release_v0.3/backend/src/backend/gen_context.cpp000066400000000000000000001727641223142177000220120ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporatin * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file gen_context.cpp * \author Benjamin Segovia */ #include "backend/gen_context.hpp" #include "backend/gen_program.hpp" #include "backend/gen_defs.hpp" #include "backend/gen_encoder.hpp" #include "backend/gen_insn_selection.hpp" #include "backend/gen_insn_scheduling.hpp" #include "backend/gen_reg_allocation.hpp" #include "backend/gen/gen_mesa_disasm.h" #include "ir/function.hpp" #include "sys/cvar.hpp" #include namespace gbe { /////////////////////////////////////////////////////////////////////////// // GenContext implementation /////////////////////////////////////////////////////////////////////////// GenContext::GenContext(const ir::Unit &unit, const std::string &name, bool limitRegisterPressure) : Context(unit, name), limitRegisterPressure(limitRegisterPressure) { this->p = GBE_NEW(GenEncoder, simdWidth, 7); // XXX handle more than Gen7 this->sel = GBE_NEW(Selection, *this); this->ra = GBE_NEW(GenRegAllocator, *this); } GenContext::~GenContext(void) { GBE_DELETE(this->ra); GBE_DELETE(this->sel); GBE_DELETE(this->p); } void GenContext::emitInstructionStream(void) { // Emit Gen ISA for (auto &block : *sel->blockList) for (auto &insn : block.insnList) { const uint32_t opcode = insn.opcode; p->push(); // no more virtual register here in that part of the code generation GBE_ASSERT(insn.state.physicalFlag); p->curr = insn.state; switch (opcode) { #define DECL_SELECTION_IR(OPCODE, FAMILY) \ case SEL_OP_##OPCODE: this->emit##FAMILY(insn); break; #include "backend/gen_insn_selection.hxx" #undef DECL_INSN } p->pop(); } /* per spec, pad the instruction stream with 8 nop to avoid instruction prefetcher prefetch into an invalide page */ for(int i = 0; i < 8; i++) p->NOP(); } void GenContext::patchBranches(void) { using namespace ir; for (auto pair : branchPos2) { const LabelIndex label = pair.first; const int32_t insnID = pair.second; const int32_t targetID = labelPos.find(label)->second; p->patchJMPI(insnID, (targetID-insnID-1) * 2); } } void GenContext::clearFlagRegister(void) { // when group size not aligned to simdWidth, flag register need clear to // make prediction(any8/16h) work correctly p->push(); p->curr.predicate = GEN_PREDICATE_NONE; p->curr.noMask = 1; p->curr.execWidth = 1; p->MOV(GenRegister::retype(GenRegister::flag(0,0), GEN_TYPE_UD), GenRegister::immud(0x0)); p->MOV(GenRegister::retype(GenRegister::flag(1,0), GEN_TYPE_UD), GenRegister::immud(0x0)); p->pop(); } void GenContext::emitStackPointer(void) { using namespace ir; // Only emit stack pointer computation if we use a stack if (kernel->getCurbeOffset(GBE_CURBE_STACK_POINTER, 0) <= 0) return; // Check that everything is consistent in the kernel code const uint32_t perLaneSize = kernel->getStackSize(); const uint32_t perThreadSize = perLaneSize * this->simdWidth; const int32_t offset = GEN_REG_SIZE + kernel->getCurbeOffset(GBE_CURBE_EXTRA_ARGUMENT, GBE_STACK_BUFFER); GBE_ASSERT(perLaneSize > 0); GBE_ASSERT(isPowerOf<2>(perLaneSize) == true); GBE_ASSERT(isPowerOf<2>(perThreadSize) == true); // Use shifts rather than muls which are limited to 32x16 bit sources const uint32_t perLaneShift = logi2(perLaneSize); const uint32_t perThreadShift = logi2(perThreadSize); const GenRegister selStatckPtr = this->simdWidth == 8 ? GenRegister::ud8grf(ir::ocl::stackptr) : GenRegister::ud16grf(ir::ocl::stackptr); const GenRegister stackptr = ra->genReg(selStatckPtr); const uint32_t nr = offset / GEN_REG_SIZE; const uint32_t subnr = (offset % GEN_REG_SIZE) / sizeof(uint32_t); const GenRegister bufferptr = GenRegister::ud1grf(nr, subnr); // We compute the per-lane stack pointer here p->push(); p->curr.execWidth = 1; p->curr.predicate = GEN_PREDICATE_NONE; p->AND(GenRegister::ud1grf(126,0), GenRegister::ud1grf(0,5), GenRegister::immud(0x1ff)); p->curr.execWidth = this->simdWidth; p->SHL(stackptr, stackptr, GenRegister::immud(perLaneShift)); p->curr.execWidth = 1; p->SHL(GenRegister::ud1grf(126,0), GenRegister::ud1grf(126,0), GenRegister::immud(perThreadShift)); p->curr.execWidth = this->simdWidth; p->ADD(stackptr, stackptr, bufferptr); p->ADD(stackptr, stackptr, GenRegister::ud1grf(126,0)); p->pop(); } void GenContext::emitLabelInstruction(const SelectionInstruction &insn) { const ir::LabelIndex label(insn.index); this->labelPos.insert(std::make_pair(label, p->store.size())); } void GenContext::emitUnaryInstruction(const SelectionInstruction &insn) { const GenRegister dst = ra->genReg(insn.dst(0)); const GenRegister src = ra->genReg(insn.src(0)); switch (insn.opcode) { case SEL_OP_MOV: p->MOV(dst, src); break; case SEL_OP_FBH: p->FBH(dst, src); break; case SEL_OP_FBL: p->FBL(dst, src); break; case SEL_OP_NOT: p->NOT(dst, src); break; case SEL_OP_RNDD: p->RNDD(dst, src); break; case SEL_OP_RNDU: p->RNDU(dst, src); break; case SEL_OP_RNDE: p->RNDE(dst, src); break; case SEL_OP_RNDZ: p->RNDZ(dst, src); break; case SEL_OP_LOAD_INT64_IMM: p->LOAD_INT64_IMM(dst, src.value.i64); break; case SEL_OP_CONVI64_TO_I: { int execWidth = p->curr.execWidth; GenRegister xsrc = src.bottom_half(), xdst = dst; p->push(); p->curr.execWidth = 8; for(int i = 0; i < execWidth/4; i ++) { p->curr.chooseNib(i); p->MOV(xdst, xsrc); xdst = GenRegister::suboffset(xdst, 4); xsrc = GenRegister::suboffset(xsrc, 8); } p->pop(); break; } default: NOT_IMPLEMENTED; } } void GenContext::emitUnaryWithTempInstruction(const SelectionInstruction &insn) { GenRegister dst = ra->genReg(insn.dst(0)); GenRegister src = ra->genReg(insn.src(0)); GenRegister tmp = ra->genReg(insn.dst(1)); switch (insn.opcode) { case SEL_OP_LOAD_DF_IMM: p->LOAD_DF_IMM(dst, tmp, src.value.df); break; case SEL_OP_MOV_DF: p->MOV_DF(dst, src, tmp); break; case SEL_OP_CONVF_TO_I64: { tmp.type = GEN_TYPE_F; GenRegister d = GenRegister::retype(tmp, GEN_TYPE_D); float c = (1.f / 65536.f) * (1.f / 65536.f); p->MUL(tmp, src, GenRegister::immf(c)); p->RNDZ(tmp, tmp); p->MOV(d, tmp); storeTopHalf(dst, d); d.type = GEN_TYPE_UD; p->MOV(d, GenRegister::abs(src)); storeBottomHalf(dst, d); break; } case SEL_OP_CONVI_TO_I64: { GenRegister middle; if (src.type == GEN_TYPE_B || src.type == GEN_TYPE_D) { middle = tmp; middle.type = src.is_signed_int() ? GEN_TYPE_D : GEN_TYPE_UD; p->MOV(middle, src); } else { middle = src; } int execWidth = p->curr.execWidth; p->push(); p->curr.execWidth = 8; for (int nib = 0; nib < execWidth / 4; nib ++) { p->curr.chooseNib(nib); p->MOV(dst.bottom_half(), middle); if(middle.is_signed_int()) p->ASR(dst.top_half(), middle, GenRegister::immud(31)); else p->MOV(dst.top_half(), GenRegister::immd(0)); dst = GenRegister::suboffset(dst, 4); middle = GenRegister::suboffset(middle, 4); } p->pop(); break; } default: NOT_IMPLEMENTED; } } void GenContext::emitBinaryWithTempInstruction(const SelectionInstruction &insn) { GenRegister dst = ra->genReg(insn.dst(0)); GenRegister src0 = ra->genReg(insn.src(0)); GenRegister src1 = ra->genReg(insn.src(1)); GenRegister tmp = ra->genReg(insn.dst(1)); switch (insn.opcode) { case SEL_OP_I64ADD: { GenRegister x = GenRegister::retype(tmp, GEN_TYPE_UD), y = GenRegister::suboffset(x, p->curr.execWidth); loadBottomHalf(x, src0); loadBottomHalf(y, src1); addWithCarry(x, x, y); storeBottomHalf(dst, x); loadTopHalf(x, src0); p->ADD(x, x, y); loadTopHalf(y, src1); p->ADD(x, x, y); storeTopHalf(dst, x); break; } case SEL_OP_I64SUB: { GenRegister x = GenRegister::retype(tmp, GEN_TYPE_UD), y = GenRegister::suboffset(x, p->curr.execWidth); loadBottomHalf(x, src0); loadBottomHalf(y, src1); subWithBorrow(x, x, y); storeBottomHalf(dst, x); loadTopHalf(x, src0); subWithBorrow(x, x, y); loadTopHalf(y, src1); subWithBorrow(x, x, y); storeTopHalf(dst, x); break; } case SEL_OP_MUL_HI: { int w = p->curr.execWidth; p->push(); p->curr.execWidth = 8; for (int i = 0; i < w / 8; i ++) { p->push(); p->curr.predicate = GEN_PREDICATE_NONE; p->MUL(GenRegister::retype(GenRegister::acc(), GEN_TYPE_UD), src0, src1); p->curr.accWrEnable = 1; p->MACH(tmp, src0, src1); p->pop(); p->curr.quarterControl = i; p->MOV(dst, tmp); dst = GenRegister::Qn(dst, 1); src0 = GenRegister::Qn(src0, 1); src1 = GenRegister::Qn(src1, 1); } p->pop(); break; } case SEL_OP_HADD: { int w = p->curr.execWidth; p->push(); p->curr.execWidth = 8; for (int i = 0; i < w / 8; i ++) { p->curr.quarterControl = i; p->ADDC(dst, src0, src1); p->SHR(dst, dst, GenRegister::immud(1)); p->SHL(tmp, GenRegister::retype(GenRegister::acc(), GEN_TYPE_D), GenRegister::immud(31)); p->OR(dst, dst, tmp); dst = GenRegister::Qn(dst, 1); src0 = GenRegister::Qn(src0, 1); src1 = GenRegister::Qn(src1, 1); } p->pop(); break; } case SEL_OP_RHADD: { int w = p->curr.execWidth; p->push(); p->curr.execWidth = 8; for (int i = 0; i < w / 8; i ++) { p->curr.quarterControl = i; p->ADDC(dst, src0, src1); p->ADD(dst, dst, GenRegister::immud(1)); p->SHR(dst, dst, GenRegister::immud(1)); p->SHL(tmp, GenRegister::retype(GenRegister::acc(), GEN_TYPE_D), GenRegister::immud(31)); p->OR(dst, dst, tmp); dst = GenRegister::Qn(dst, 1); src0 = GenRegister::Qn(src0, 1); src1 = GenRegister::Qn(src1, 1); } p->pop(); break; } default: NOT_IMPLEMENTED; } } void GenContext::emitBinaryInstruction(const SelectionInstruction &insn) { const GenRegister dst = ra->genReg(insn.dst(0)); const GenRegister src0 = ra->genReg(insn.src(0)); const GenRegister src1 = ra->genReg(insn.src(1)); switch (insn.opcode) { case SEL_OP_SEL: p->SEL(dst, src0, src1); break; case SEL_OP_SEL_INT64: { GenRegister xdst = GenRegister::retype(dst, GEN_TYPE_UL), xsrc0 = GenRegister::retype(src0, GEN_TYPE_UL), xsrc1 = GenRegister::retype(src1, GEN_TYPE_UL); int execWidth = p->curr.execWidth; p->push(); p->curr.execWidth = 8; for (int nib = 0; nib < execWidth / 4; nib ++) { p->curr.chooseNib(nib); p->SEL(xdst.bottom_half(), xsrc0.bottom_half(), xsrc1.bottom_half()); p->SEL(xdst.top_half(), xsrc0.top_half(), xsrc1.top_half()); xdst = GenRegister::suboffset(xdst, 4); xsrc0 = GenRegister::suboffset(xsrc0, 4); xsrc1 = GenRegister::suboffset(xsrc1, 4); } p->pop(); } break; case SEL_OP_AND: p->AND(dst, src0, src1); break; case SEL_OP_OR: p->OR (dst, src0, src1); break; case SEL_OP_XOR: p->XOR(dst, src0, src1); break; case SEL_OP_I64AND: { GenRegister xdst = GenRegister::retype(dst, GEN_TYPE_UL), xsrc0 = GenRegister::retype(src0, GEN_TYPE_UL), xsrc1 = GenRegister::retype(src1, GEN_TYPE_UL); int execWidth = p->curr.execWidth; p->push(); p->curr.execWidth = 8; for (int nib = 0; nib < execWidth / 4; nib ++) { p->curr.chooseNib(nib); p->AND(xdst.bottom_half(), xsrc0.bottom_half(), xsrc1.bottom_half()); p->AND(xdst.top_half(), xsrc0.top_half(), xsrc1.top_half()); xdst = GenRegister::suboffset(xdst, 4), xsrc0 = GenRegister::suboffset(xsrc0, 4), xsrc1 = GenRegister::suboffset(xsrc1, 4); } p->pop(); } break; case SEL_OP_I64OR: { GenRegister xdst = GenRegister::retype(dst, GEN_TYPE_UL), xsrc0 = GenRegister::retype(src0, GEN_TYPE_UL), xsrc1 = GenRegister::retype(src1, GEN_TYPE_UL); int execWidth = p->curr.execWidth; p->push(); p->curr.execWidth = 8; for (int nib = 0; nib < execWidth / 4; nib ++) { p->curr.chooseNib(nib); p->OR(xdst.bottom_half(), xsrc0.bottom_half(), xsrc1.bottom_half()); p->OR(xdst.top_half(), xsrc0.top_half(), xsrc1.top_half()); xdst = GenRegister::suboffset(xdst, 4), xsrc0 = GenRegister::suboffset(xsrc0, 4), xsrc1 = GenRegister::suboffset(xsrc1, 4); } p->pop(); } break; case SEL_OP_I64XOR: { GenRegister xdst = GenRegister::retype(dst, GEN_TYPE_UL), xsrc0 = GenRegister::retype(src0, GEN_TYPE_UL), xsrc1 = GenRegister::retype(src1, GEN_TYPE_UL); int execWidth = p->curr.execWidth; p->push(); p->curr.execWidth = 8; for (int nib = 0; nib < execWidth / 4; nib ++) { p->curr.chooseNib(nib); p->XOR(xdst.bottom_half(), xsrc0.bottom_half(), xsrc1.bottom_half()); p->XOR(xdst.top_half(), xsrc0.top_half(), xsrc1.top_half()); xdst = GenRegister::suboffset(xdst, 4), xsrc0 = GenRegister::suboffset(xsrc0, 4), xsrc1 = GenRegister::suboffset(xsrc1, 4); } p->pop(); } break; case SEL_OP_SHR: p->SHR(dst, src0, src1); break; case SEL_OP_SHL: p->SHL(dst, src0, src1); break; case SEL_OP_RSR: p->RSR(dst, src0, src1); break; case SEL_OP_RSL: p->RSL(dst, src0, src1); break; case SEL_OP_ASR: p->ASR(dst, src0, src1); break; case SEL_OP_ADD: p->ADD(dst, src0, src1); break; case SEL_OP_MUL: p->MUL(dst, src0, src1); break; case SEL_OP_MACH: p->MACH(dst, src0, src1); break; case SEL_OP_UPSAMPLE_SHORT: p->UPSAMPLE_SHORT(dst, src0, src1); break; case SEL_OP_UPSAMPLE_INT: p->UPSAMPLE_INT(dst, src0, src1); break; case SEL_OP_UPSAMPLE_LONG: { GenRegister xdst = GenRegister::retype(dst, GEN_TYPE_UL), xsrc0 = GenRegister::retype(src0, GEN_TYPE_UL), xsrc1 = GenRegister::retype(src1, GEN_TYPE_UL); int execWidth = p->curr.execWidth; p->push(); p->curr.execWidth = 8; for (int nib = 0; nib < execWidth / 4; nib ++) { p->curr.chooseNib(nib); p->MOV(xdst.top_half(), xsrc0.bottom_half()); p->MOV(xdst.bottom_half(), xsrc1.bottom_half()); xdst = GenRegister::suboffset(xdst, 4); xsrc0 = GenRegister::suboffset(xsrc0, 4); xsrc1 = GenRegister::suboffset(xsrc1, 4); } p->pop(); } break; default: NOT_IMPLEMENTED; } } void GenContext::collectShifter(GenRegister dest, GenRegister src) { int execWidth = p->curr.execWidth; p->push(); p->curr.predicate = GEN_PREDICATE_NONE; p->curr.execWidth = 8; for (int nib = 0; nib < execWidth / 4; nib ++) { p->AND(dest, src.bottom_half(), GenRegister::immud(63)); dest = GenRegister::suboffset(dest, 4); src = GenRegister::suboffset(src, 4); } p->pop(); } void GenContext::I64FullAdd(GenRegister high1, GenRegister low1, GenRegister high2, GenRegister low2) { addWithCarry(low1, low1, low2); addWithCarry(high1, high1, high2); p->ADD(high1, high1, low2); } void GenContext::I64FullMult(GenRegister dst1, GenRegister dst2, GenRegister dst3, GenRegister dst4, GenRegister x_high, GenRegister x_low, GenRegister y_high, GenRegister y_low) { GenRegister &e = dst1, &f = dst2, &g = dst3, &h = dst4, &a = x_high, &b = x_low, &c = y_high, &d = y_low; I32FullMult(e, h, b, d); I32FullMult(f, g, a, d); addWithCarry(g, g, e); addWithCarry(f, f, e); I32FullMult(e, d, b, c); I64FullAdd(f, g, e, d); I32FullMult(b, d, a, c); I64FullAdd(e, f, b, d); } void GenContext::I64Neg(GenRegister high, GenRegister low, GenRegister tmp) { p->NOT(high, high); p->NOT(low, low); p->MOV(tmp, GenRegister::immud(1)); addWithCarry(low, low, tmp); p->ADD(high, high, tmp); } void GenContext::I64ABS(GenRegister sign, GenRegister high, GenRegister low, GenRegister tmp, GenRegister flagReg) { p->SHR(sign, high, GenRegister::immud(31)); p->push(); p->curr.predicate = GEN_PREDICATE_NONE; p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr()); p->CMP(GEN_CONDITIONAL_NZ, sign, GenRegister::immud(0)); p->curr.predicate = GEN_PREDICATE_NORMAL; I64Neg(high, low, tmp); p->pop(); } void GenContext::emitI64MULHIInstruction(const SelectionInstruction &insn) { GenRegister dest = ra->genReg(insn.dst(0)); GenRegister x = ra->genReg(insn.src(0)); GenRegister y = ra->genReg(insn.src(1)); GenRegister a = ra->genReg(insn.dst(1)); GenRegister b = ra->genReg(insn.dst(2)); GenRegister c = ra->genReg(insn.dst(3)); GenRegister d = ra->genReg(insn.dst(4)); GenRegister e = ra->genReg(insn.dst(5)); GenRegister f = ra->genReg(insn.dst(6)); GenRegister g = ra->genReg(insn.dst(7)); GenRegister h = ra->genReg(insn.dst(8)); GenRegister i = ra->genReg(insn.dst(9)); GenRegister flagReg = ra->genReg(insn.dst(10)); loadTopHalf(a, x); loadBottomHalf(b, x); loadTopHalf(c, y); loadBottomHalf(d, y); if(x.type == GEN_TYPE_UL) { I64FullMult(e, f, g, h, a, b, c, d); } else { I64ABS(e, a, b, i, flagReg); I64ABS(f, c, d, i, flagReg); p->XOR(i, e, f); I64FullMult(e, f, g, h, a, b, c, d); p->push(); p->curr.predicate = GEN_PREDICATE_NONE; p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr()); p->CMP(GEN_CONDITIONAL_NZ, i, GenRegister::immud(0)); p->curr.predicate = GEN_PREDICATE_NORMAL; p->NOT(e, e); p->NOT(f, f); p->NOT(g, g); p->NOT(h, h); p->MOV(i, GenRegister::immud(1)); addWithCarry(h, h, i); addWithCarry(g, g, i); addWithCarry(f, f, i); p->ADD(e, e, i); p->pop(); } storeTopHalf(dest, e); storeBottomHalf(dest, f); } void GenContext::emitI64MADSATInstruction(const SelectionInstruction &insn) { GenRegister dest = ra->genReg(insn.dst(0)); GenRegister x = ra->genReg(insn.src(0)); GenRegister y = ra->genReg(insn.src(1)); GenRegister z = ra->genReg(insn.src(2)); GenRegister a = ra->genReg(insn.dst(1)); GenRegister b = ra->genReg(insn.dst(2)); GenRegister c = ra->genReg(insn.dst(3)); GenRegister d = ra->genReg(insn.dst(4)); GenRegister e = ra->genReg(insn.dst(5)); GenRegister f = ra->genReg(insn.dst(6)); GenRegister g = ra->genReg(insn.dst(7)); GenRegister h = ra->genReg(insn.dst(8)); GenRegister i = ra->genReg(insn.dst(9)); GenRegister flagReg = ra->genReg(insn.dst(10)); GenRegister zero = GenRegister::immud(0), one = GenRegister::immud(1); loadTopHalf(a, x); loadBottomHalf(b, x); loadTopHalf(c, y); loadBottomHalf(d, y); if(x.type == GEN_TYPE_UL) { I64FullMult(e, f, g, h, a, b, c, d); loadTopHalf(c, z); loadBottomHalf(d, z); addWithCarry(h, h, d); addWithCarry(g, g, d); addWithCarry(f, f, d); p->ADD(e, e, d); addWithCarry(g, g, c); addWithCarry(f, f, c); p->ADD(e, e, c); p->OR(a, e, f); p->push(); p->curr.predicate = GEN_PREDICATE_NONE; p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr()); p->CMP(GEN_CONDITIONAL_NZ, a, zero); p->curr.predicate = GEN_PREDICATE_NORMAL; p->MOV(g, GenRegister::immd(-1)); p->MOV(h, GenRegister::immd(-1)); p->pop(); } else { I64ABS(e, a, b, i, flagReg); I64ABS(f, c, d, i, flagReg); p->XOR(i, e, f); I64FullMult(e, f, g, h, a, b, c, d); p->push(); p->curr.predicate = GEN_PREDICATE_NONE; p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr()); p->CMP(GEN_CONDITIONAL_NZ, i, zero); p->curr.predicate = GEN_PREDICATE_NORMAL; p->NOT(e, e); p->NOT(f, f); p->NOT(g, g); p->NOT(h, h); p->MOV(i, one); addWithCarry(h, h, i); addWithCarry(g, g, i); addWithCarry(f, f, i); p->ADD(e, e, i); p->pop(); loadTopHalf(c, z); loadBottomHalf(d, z); p->ASR(GenRegister::retype(b, GEN_TYPE_D), GenRegister::retype(c, GEN_TYPE_D), GenRegister::immd(31)); p->MOV(a, b); addWithCarry(h, h, d); addWithCarry(g, g, d); addWithCarry(f, f, d); p->ADD(e, e, d); addWithCarry(g, g, c); addWithCarry(f, f, c); p->ADD(e, e, c); addWithCarry(f, f, b); p->ADD(e, e, b); p->ADD(e, e, a); p->MOV(b, zero); p->push(); p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr()); p->curr.predicate = GEN_PREDICATE_NONE; p->CMP(GEN_CONDITIONAL_NZ, e, zero); p->curr.predicate = GEN_PREDICATE_NORMAL; p->MOV(b, one); p->curr.predicate = GEN_PREDICATE_NONE; p->CMP(GEN_CONDITIONAL_NZ, f, zero); p->curr.predicate = GEN_PREDICATE_NORMAL; p->MOV(b, one); p->curr.predicate = GEN_PREDICATE_NONE; p->CMP(GEN_CONDITIONAL_G, g, GenRegister::immud(0x7FFFFFFF)); p->curr.predicate = GEN_PREDICATE_NORMAL; p->MOV(b, one); p->curr.predicate = GEN_PREDICATE_NONE; p->SHR(a, e, GenRegister::immud(31)); p->CMP(GEN_CONDITIONAL_NZ, a, zero); p->curr.predicate = GEN_PREDICATE_NORMAL; p->MOV(b, zero); p->curr.predicate = GEN_PREDICATE_NONE; p->CMP(GEN_CONDITIONAL_NZ, b, zero); p->curr.predicate = GEN_PREDICATE_NORMAL; p->MOV(g, GenRegister::immud(0x7FFFFFFF)); p->MOV(h, GenRegister::immud(0xFFFFFFFFu)); p->curr.predicate = GEN_PREDICATE_NONE; p->MOV(b, zero); p->CMP(GEN_CONDITIONAL_NEQ, e, GenRegister::immud(0xFFFFFFFFu)); p->curr.predicate = GEN_PREDICATE_NORMAL; p->MOV(b, one); p->curr.predicate = GEN_PREDICATE_NONE; p->CMP(GEN_CONDITIONAL_NEQ, f, GenRegister::immud(0xFFFFFFFFu)); p->curr.predicate = GEN_PREDICATE_NORMAL; p->MOV(b, one); p->curr.predicate = GEN_PREDICATE_NONE; p->CMP(GEN_CONDITIONAL_LE, g, GenRegister::immud(0x7FFFFFFF)); p->curr.predicate = GEN_PREDICATE_NORMAL; p->MOV(b, one); p->curr.predicate = GEN_PREDICATE_NONE; p->CMP(GEN_CONDITIONAL_Z, a, zero); p->curr.predicate = GEN_PREDICATE_NORMAL; p->MOV(b, zero); p->curr.predicate = GEN_PREDICATE_NONE; p->CMP(GEN_CONDITIONAL_NZ, b, zero); p->curr.predicate = GEN_PREDICATE_NORMAL; p->MOV(g, GenRegister::immud(0x80000000u)); p->MOV(h, zero); p->pop(); } storeTopHalf(dest, g); storeBottomHalf(dest, h); } void GenContext::emitI64HADDInstruction(const SelectionInstruction &insn) { GenRegister dest = ra->genReg(insn.dst(0)); GenRegister x = ra->genReg(insn.src(0)); GenRegister y = ra->genReg(insn.src(1)); GenRegister a = ra->genReg(insn.dst(1)); GenRegister b = ra->genReg(insn.dst(2)); GenRegister c = ra->genReg(insn.dst(3)); GenRegister d = ra->genReg(insn.dst(4)); a.type = b.type = c.type = d.type = GEN_TYPE_UD; loadBottomHalf(a, x); loadBottomHalf(b, y); loadTopHalf(c, x); loadTopHalf(d, y); addWithCarry(a, a, b); addWithCarry(c, c, b); addWithCarry(c, c, d); p->ADD(b, b, d); p->SHR(a, a, GenRegister::immud(1)); p->SHL(d, c, GenRegister::immud(31)); p->OR(a, a, d); p->SHR(c, c, GenRegister::immud(1)); p->SHL(d, b, GenRegister::immud(31)); p->OR(c, c, d); storeBottomHalf(dest, a); storeTopHalf(dest, c); } void GenContext::emitI64RHADDInstruction(const SelectionInstruction &insn) { GenRegister dest = ra->genReg(insn.dst(0)); GenRegister x = ra->genReg(insn.src(0)); GenRegister y = ra->genReg(insn.src(1)); GenRegister a = ra->genReg(insn.dst(1)); GenRegister b = ra->genReg(insn.dst(2)); GenRegister c = ra->genReg(insn.dst(3)); GenRegister d = ra->genReg(insn.dst(4)); a.type = b.type = c.type = d.type = GEN_TYPE_UD; loadBottomHalf(a, x); loadBottomHalf(b, y); addWithCarry(a, a, b); p->MOV(c, GenRegister::immud(1)); addWithCarry(a, a, c); p->ADD(b, b, c); loadTopHalf(c, x); loadTopHalf(d, y); addWithCarry(c, c, b); addWithCarry(c, c, d); p->ADD(b, b, d); p->SHR(a, a, GenRegister::immud(1)); p->SHL(d, c, GenRegister::immud(31)); p->OR(a, a, d); p->SHR(c, c, GenRegister::immud(1)); p->SHL(d, b, GenRegister::immud(31)); p->OR(c, c, d); storeBottomHalf(dest, a); storeTopHalf(dest, c); } void GenContext::emitI64ShiftInstruction(const SelectionInstruction &insn) { GenRegister dest = ra->genReg(insn.dst(0)); GenRegister x = ra->genReg(insn.src(0)); GenRegister y = ra->genReg(insn.src(1)); GenRegister a = ra->genReg(insn.dst(1)); GenRegister b = ra->genReg(insn.dst(2)); GenRegister c = ra->genReg(insn.dst(3)); GenRegister d = ra->genReg(insn.dst(4)); GenRegister e = ra->genReg(insn.dst(5)); GenRegister f = ra->genReg(insn.dst(6)); a.type = b.type = c.type = d.type = e.type = f.type = GEN_TYPE_UD; GenRegister flagReg = ra->genReg(insn.dst(7)); GenRegister zero = GenRegister::immud(0); switch(insn.opcode) { case SEL_OP_I64SHL: p->push(); p->curr.predicate = GEN_PREDICATE_NONE; collectShifter(a, y); loadBottomHalf(e, x); loadTopHalf(f, x); p->SHR(b, e, GenRegister::negate(a)); p->SHL(c, e, a); p->SHL(d, f, a); p->OR(e, d, b); p->MOV(flagReg, GenRegister::immuw(0xFFFF)); p->curr.predicate = GEN_PREDICATE_NORMAL; p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr()); p->CMP(GEN_CONDITIONAL_Z, a, zero); p->SEL(d, d, e); p->curr.predicate = GEN_PREDICATE_NONE; p->AND(a, a, GenRegister::immud(32)); p->MOV(flagReg, GenRegister::immuw(0xFFFF)); p->curr.predicate = GEN_PREDICATE_NORMAL; p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr()); p->CMP(GEN_CONDITIONAL_Z, a, zero); p->SEL(d, d, c); p->SEL(c, c, zero); p->pop(); storeBottomHalf(dest, c); storeTopHalf(dest, d); break; case SEL_OP_I64SHR: p->push(); p->curr.predicate = GEN_PREDICATE_NONE; collectShifter(a, y); loadBottomHalf(e, x); loadTopHalf(f, x); p->SHL(b, f, GenRegister::negate(a)); p->SHR(c, f, a); p->SHR(d, e, a); p->OR(e, d, b); p->MOV(flagReg, GenRegister::immuw(0xFFFF)); p->curr.predicate = GEN_PREDICATE_NORMAL; p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr()); p->CMP(GEN_CONDITIONAL_Z, a, zero); p->SEL(d, d, e); p->curr.predicate = GEN_PREDICATE_NONE; p->AND(a, a, GenRegister::immud(32)); p->MOV(flagReg, GenRegister::immuw(0xFFFF)); p->curr.predicate = GEN_PREDICATE_NORMAL; p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr()); p->CMP(GEN_CONDITIONAL_Z, a, zero); p->SEL(d, d, c); p->SEL(c, c, zero); p->pop(); storeBottomHalf(dest, d); storeTopHalf(dest, c); break; case SEL_OP_I64ASR: f.type = GEN_TYPE_D; p->push(); p->curr.predicate = GEN_PREDICATE_NONE; collectShifter(a, y); loadBottomHalf(e, x); loadTopHalf(f, x); p->SHL(b, f, GenRegister::negate(a)); p->ASR(c, f, a); p->SHR(d, e, a); p->OR(e, d, b); p->MOV(flagReg, GenRegister::immuw(0xFFFF)); p->curr.predicate = GEN_PREDICATE_NORMAL; p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr()); p->CMP(GEN_CONDITIONAL_Z, a, zero); p->SEL(d, d, e); p->curr.predicate = GEN_PREDICATE_NONE; p->AND(a, a, GenRegister::immud(32)); p->MOV(flagReg, GenRegister::immuw(0xFFFF)); p->curr.predicate = GEN_PREDICATE_NORMAL; p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr()); p->CMP(GEN_CONDITIONAL_Z, a, zero); p->SEL(d, d, c); p->SEL(c, c, GenRegister::immd(-1)); p->pop(); storeBottomHalf(dest, d); storeTopHalf(dest, c); break; default: NOT_IMPLEMENTED; } } void GenContext::saveFlag(GenRegister dest, int flag, int subFlag) { p->push(); p->curr.execWidth = 1; p->MOV(dest, GenRegister::flag(flag, subFlag)); p->pop(); } void GenContext::UnsignedI64ToFloat(GenRegister dst, GenRegister high, GenRegister low, GenRegister tmp) { p->MOV(dst, high); p->MUL(dst, dst, GenRegister::immf(65536.f * 65536.f)); tmp.type = GEN_TYPE_F; p->MOV(tmp, low); p->ADD(dst, dst, tmp); } void GenContext::emitI64ToFloatInstruction(const SelectionInstruction &insn) { GenRegister src = ra->genReg(insn.src(0)); GenRegister dest = ra->genReg(insn.dst(0)); GenRegister high = ra->genReg(insn.dst(1)); GenRegister low = ra->genReg(insn.dst(2)); GenRegister tmp = ra->genReg(insn.dst(3)); GenRegister flagReg = ra->genReg(insn.dst(4)); loadTopHalf(high, src); loadBottomHalf(low, src); if(!src.is_signed_int()) { UnsignedI64ToFloat(dest, high, low, tmp); } else { p->push(); p->curr.predicate = GEN_PREDICATE_NONE; p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr()); p->CMP(GEN_CONDITIONAL_GE, high, GenRegister::immud(0x80000000)); p->curr.predicate = GEN_PREDICATE_NORMAL; p->NOT(high, high); p->NOT(low, low); p->MOV(tmp, GenRegister::immud(1)); addWithCarry(low, low, tmp); p->ADD(high, high, tmp); p->pop(); UnsignedI64ToFloat(dest, high, low, tmp); p->push(); p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr()); dest.type = GEN_TYPE_UD; p->OR(dest, dest, GenRegister::immud(0x80000000)); p->pop(); } } void GenContext::emitI64CompareInstruction(const SelectionInstruction &insn) { GenRegister src0 = ra->genReg(insn.src(0)); GenRegister src1 = ra->genReg(insn.src(1)); GenRegister tmp0 = ra->genReg(insn.dst(0)); GenRegister tmp1 = ra->genReg(insn.dst(1)); GenRegister tmp2 = ra->genReg(insn.dst(2)); tmp0.type = (src0.type == GEN_TYPE_L) ? GEN_TYPE_D : GEN_TYPE_UD; tmp1.type = (src1.type == GEN_TYPE_L) ? GEN_TYPE_D : GEN_TYPE_UD; int flag = p->curr.flag, subFlag = p->curr.subFlag; GenRegister f1 = GenRegister::retype(tmp2, GEN_TYPE_UW), f2 = GenRegister::suboffset(f1, 1), f3 = GenRegister::suboffset(f1, 2), f4 = GenRegister::suboffset(f1, 3); p->push(); p->curr.predicate = GEN_PREDICATE_NONE; saveFlag(f4, flag, subFlag); loadTopHalf(tmp0, src0); loadTopHalf(tmp1, src1); switch(insn.extra.function) { case GEN_CONDITIONAL_L: case GEN_CONDITIONAL_LE: case GEN_CONDITIONAL_G: case GEN_CONDITIONAL_GE: { int cmpTopHalf = insn.extra.function; if(insn.extra.function == GEN_CONDITIONAL_LE) cmpTopHalf = GEN_CONDITIONAL_L; if(insn.extra.function == GEN_CONDITIONAL_GE) cmpTopHalf = GEN_CONDITIONAL_G; p->CMP(cmpTopHalf, tmp0, tmp1); } saveFlag(f1, flag, subFlag); p->CMP(GEN_CONDITIONAL_EQ, tmp0, tmp1); saveFlag(f2, flag, subFlag); tmp0.type = tmp1.type = GEN_TYPE_UD; loadBottomHalf(tmp0, src0); loadBottomHalf(tmp1, src1); p->CMP(insn.extra.function, tmp0, tmp1); saveFlag(f3, flag, subFlag); p->push(); p->curr.execWidth = 1; p->AND(f2, f2, f3); p->OR(f1, f1, f2); p->pop(); break; case GEN_CONDITIONAL_EQ: p->CMP(GEN_CONDITIONAL_EQ, tmp0, tmp1); saveFlag(f1, flag, subFlag); tmp0.type = tmp1.type = GEN_TYPE_UD; loadBottomHalf(tmp0, src0); loadBottomHalf(tmp1, src1); p->CMP(GEN_CONDITIONAL_EQ, tmp0, tmp1); saveFlag(f2, flag, subFlag); p->push(); p->curr.execWidth = 1; p->AND(f1, f1, f2); p->pop(); break; case GEN_CONDITIONAL_NEQ: p->CMP(GEN_CONDITIONAL_NEQ, tmp0, tmp1); saveFlag(f1, flag, subFlag); tmp0.type = tmp1.type = GEN_TYPE_UD; loadBottomHalf(tmp0, src0); loadBottomHalf(tmp1, src1); p->CMP(GEN_CONDITIONAL_NEQ, tmp0, tmp1); saveFlag(f2, flag, subFlag); p->push(); p->curr.execWidth = 1; p->OR(f1, f1, f2); p->pop(); break; default: NOT_IMPLEMENTED; } p->curr.execWidth = 1; p->AND(f1, f1, f4); p->MOV(GenRegister::flag(flag, subFlag), f1); p->pop(); } void GenContext::emitI64SATADDInstruction(const SelectionInstruction &insn) { GenRegister x = ra->genReg(insn.src(0)); GenRegister y = ra->genReg(insn.src(1)); GenRegister dst = ra->genReg(insn.dst(0)); GenRegister a = ra->genReg(insn.dst(1)); GenRegister b = ra->genReg(insn.dst(2)); GenRegister c = ra->genReg(insn.dst(3)); GenRegister d = ra->genReg(insn.dst(4)); GenRegister e = ra->genReg(insn.dst(5)); GenRegister flagReg = ra->genReg(insn.dst(6)); loadTopHalf(a, x); loadBottomHalf(b, x); loadTopHalf(c, y); loadBottomHalf(d, y); if(dst.is_signed_int()) p->SHR(e, a, GenRegister::immud(31)); addWithCarry(b, b, d); addWithCarry(a, a, d); addWithCarry(a, a, c); p->ADD(c, c, d); p->push(); p->curr.predicate = GEN_PREDICATE_NONE; p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr()); if(! dst.is_signed_int()) { p->CMP(GEN_CONDITIONAL_NZ, c, GenRegister::immud(0)); p->curr.predicate = GEN_PREDICATE_NORMAL; p->MOV(a, GenRegister::immud(0xFFFFFFFFu)); p->MOV(b, GenRegister::immud(0xFFFFFFFFu)); } else { p->CMP(GEN_CONDITIONAL_EQ, e, GenRegister::immud(1)); p->curr.predicate = GEN_PREDICATE_NORMAL; p->CMP(GEN_CONDITIONAL_L, a, GenRegister::immud(0x80000000u)); p->MOV(a, GenRegister::immud(0x80000000u)); p->MOV(b, GenRegister::immud(0)); p->curr.predicate = GEN_PREDICATE_NONE; p->CMP(GEN_CONDITIONAL_EQ, e, GenRegister::immud(0)); p->curr.predicate = GEN_PREDICATE_NORMAL; p->CMP(GEN_CONDITIONAL_GE, a, GenRegister::immud(0x80000000u)); p->MOV(a, GenRegister::immud(0x7FFFFFFFu)); p->MOV(b, GenRegister::immud(0xFFFFFFFFu)); } p->pop(); storeTopHalf(dst, a); storeBottomHalf(dst, b); } void GenContext::emitI64SATSUBInstruction(const SelectionInstruction &insn) { GenRegister x = ra->genReg(insn.src(0)); GenRegister y = ra->genReg(insn.src(1)); GenRegister dst = ra->genReg(insn.dst(0)); GenRegister a = ra->genReg(insn.dst(1)); GenRegister b = ra->genReg(insn.dst(2)); GenRegister c = ra->genReg(insn.dst(3)); GenRegister d = ra->genReg(insn.dst(4)); GenRegister e = ra->genReg(insn.dst(5)); GenRegister flagReg = ra->genReg(insn.dst(6)); loadTopHalf(a, x); loadBottomHalf(b, x); loadTopHalf(c, y); loadBottomHalf(d, y); if(dst.is_signed_int()) p->SHR(e, a, GenRegister::immud(31)); subWithBorrow(b, b, d); subWithBorrow(a, a, d); subWithBorrow(a, a, c); p->ADD(c, c, d); p->push(); p->curr.predicate = GEN_PREDICATE_NONE; p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr()); if(! dst.is_signed_int()) { p->CMP(GEN_CONDITIONAL_NZ, c, GenRegister::immud(0)); p->curr.predicate = GEN_PREDICATE_NORMAL; p->MOV(a, GenRegister::immud(0)); p->MOV(b, GenRegister::immud(0)); } else { p->CMP(GEN_CONDITIONAL_EQ, e, GenRegister::immud(1)); p->curr.predicate = GEN_PREDICATE_NORMAL; p->CMP(GEN_CONDITIONAL_L, a, GenRegister::immud(0x80000000u)); p->MOV(a, GenRegister::immud(0x80000000u)); p->MOV(b, GenRegister::immud(0)); p->curr.predicate = GEN_PREDICATE_NONE; p->CMP(GEN_CONDITIONAL_EQ, e, GenRegister::immud(0)); p->curr.predicate = GEN_PREDICATE_NORMAL; p->CMP(GEN_CONDITIONAL_GE, a, GenRegister::immud(0x80000000u)); p->MOV(a, GenRegister::immud(0x7FFFFFFFu)); p->MOV(b, GenRegister::immud(0xFFFFFFFFu)); } p->pop(); storeTopHalf(dst, a); storeBottomHalf(dst, b); } void GenContext::loadTopHalf(GenRegister dest, GenRegister src) { int execWidth = p->curr.execWidth; src = src.top_half(); p->push(); p->curr.predicate = GEN_PREDICATE_NONE; p->curr.execWidth = 8; p->MOV(dest, src); p->MOV(GenRegister::suboffset(dest, 4), GenRegister::suboffset(src, 8)); if (execWidth == 16) { p->MOV(GenRegister::suboffset(dest, 8), GenRegister::suboffset(src, 16)); p->MOV(GenRegister::suboffset(dest, 12), GenRegister::suboffset(src, 24)); } p->pop(); } void GenContext::storeTopHalf(GenRegister dest, GenRegister src) { int execWidth = p->curr.execWidth; dest = dest.top_half(); p->push(); p->curr.execWidth = 8; p->MOV(dest, src); p->curr.nibControl = 1; p->MOV(GenRegister::suboffset(dest, 8), GenRegister::suboffset(src, 4)); if (execWidth == 16) { p->curr.quarterControl = 1; p->curr.nibControl = 0; p->MOV(GenRegister::suboffset(dest, 16), GenRegister::suboffset(src, 8)); p->curr.nibControl = 1; p->MOV(GenRegister::suboffset(dest, 24), GenRegister::suboffset(src, 12)); } p->pop(); } void GenContext::loadBottomHalf(GenRegister dest, GenRegister src) { int execWidth = p->curr.execWidth; src = src.bottom_half(); p->push(); p->curr.predicate = GEN_PREDICATE_NONE; p->curr.execWidth = 8; p->MOV(dest, src); p->MOV(GenRegister::suboffset(dest, 4), GenRegister::suboffset(src, 8)); if (execWidth == 16) { p->MOV(GenRegister::suboffset(dest, 8), GenRegister::suboffset(src, 16)); p->MOV(GenRegister::suboffset(dest, 12), GenRegister::suboffset(src, 24)); } p->pop(); } void GenContext::storeBottomHalf(GenRegister dest, GenRegister src) { int execWidth = p->curr.execWidth; dest = dest.bottom_half(); p->push(); p->curr.execWidth = 8; p->MOV(dest, src); p->curr.nibControl = 1; p->MOV(GenRegister::suboffset(dest, 8), GenRegister::suboffset(src, 4)); if (execWidth == 16) { p->curr.quarterControl = 1; p->curr.nibControl = 0; p->MOV(GenRegister::suboffset(dest, 16), GenRegister::suboffset(src, 8)); p->curr.nibControl = 1; p->MOV(GenRegister::suboffset(dest, 24), GenRegister::suboffset(src, 12)); } p->pop(); } void GenContext::addWithCarry(GenRegister dest, GenRegister src0, GenRegister src1) { int execWidth = p->curr.execWidth; GenRegister acc0 = GenRegister::retype(GenRegister::acc(), GEN_TYPE_D); p->push(); p->curr.execWidth = 8; p->ADDC(dest, src0, src1); p->MOV(src1, acc0); if (execWidth == 16) { p->curr.quarterControl = 1; p->ADDC(GenRegister::suboffset(dest, 8), GenRegister::suboffset(src0, 8), GenRegister::suboffset(src1, 8)); p->MOV(GenRegister::suboffset(src1, 8), acc0); } p->pop(); } void GenContext::subWithBorrow(GenRegister dest, GenRegister src0, GenRegister src1) { int execWidth = p->curr.execWidth; GenRegister acc0 = GenRegister::retype(GenRegister::acc(), GEN_TYPE_D); p->push(); p->curr.execWidth = 8; p->SUBB(dest, src0, src1); p->MOV(src1, acc0); if (execWidth == 16) { p->curr.quarterControl = 1; p->SUBB(GenRegister::suboffset(dest, 8), GenRegister::suboffset(src0, 8), GenRegister::suboffset(src1, 8)); p->MOV(GenRegister::suboffset(src1, 8), acc0); } p->pop(); } void GenContext::I32FullMult(GenRegister high, GenRegister low, GenRegister src0, GenRegister src1) { GenRegister acc = GenRegister::retype(GenRegister::acc(), GEN_TYPE_UD); int execWidth = p->curr.execWidth; p->push(); p->curr.execWidth = 8; for(int i = 0; i < execWidth; i += 8) { p->MUL(acc, src0, src1); p->curr.accWrEnable = 1; p->MACH(high, src0, src1); p->curr.accWrEnable = 0; p->MOV(low, acc); src0 = GenRegister::suboffset(src0, 8); src1 = GenRegister::suboffset(src1, 8); high = GenRegister::suboffset(high, 8); low = GenRegister::suboffset(low, 8); } p->pop(); } void GenContext::emitI64MULInstruction(const SelectionInstruction &insn) { GenRegister dest = ra->genReg(insn.dst(0)); GenRegister x = ra->genReg(insn.src(0)); GenRegister y = ra->genReg(insn.src(1)); GenRegister a = ra->genReg(insn.dst(1)); GenRegister b = ra->genReg(insn.dst(2)); GenRegister c = ra->genReg(insn.dst(3)); GenRegister d = ra->genReg(insn.dst(4)); GenRegister e = ra->genReg(insn.dst(5)); GenRegister f = ra->genReg(insn.dst(6)); a.type = b.type = c.type = d.type = e.type = f.type = GEN_TYPE_UD; loadTopHalf(a, x); loadBottomHalf(b, x); loadTopHalf(c, y); loadBottomHalf(d, y); p->push(); p->curr.predicate = GEN_PREDICATE_NONE; I32FullMult(GenRegister::null(), e, b, c); I32FullMult(GenRegister::null(), f, a, d); p->ADD(e, e, f); I32FullMult(f, a, b, d); p->ADD(e, e, f); p->pop(); storeTopHalf(dest, e); storeBottomHalf(dest, a); } void GenContext::emitI64DIVREMInstruction(const SelectionInstruction &insn) { GenRegister dest = ra->genReg(insn.dst(0)); GenRegister x = ra->genReg(insn.src(0)); GenRegister y = ra->genReg(insn.src(1)); GenRegister a = ra->genReg(insn.dst(1)); GenRegister b = ra->genReg(insn.dst(2)); GenRegister c = ra->genReg(insn.dst(3)); GenRegister d = ra->genReg(insn.dst(4)); GenRegister e = ra->genReg(insn.dst(5)); GenRegister f = ra->genReg(insn.dst(6)); GenRegister g = ra->genReg(insn.dst(7)); GenRegister h = ra->genReg(insn.dst(8)); GenRegister i = ra->genReg(insn.dst(9)); GenRegister j = ra->genReg(insn.dst(10)); GenRegister k = ra->genReg(insn.dst(11)); GenRegister l = ra->genReg(insn.dst(12)); GenRegister m = ra->genReg(insn.dst(13)); GenRegister flagReg = ra->genReg(insn.dst(14)); GenRegister zero = GenRegister::immud(0), one = GenRegister::immud(1), imm31 = GenRegister::immud(31); // (a,b) <- x loadTopHalf(a, x); loadBottomHalf(b, x); // (c,d) <- y loadTopHalf(c, y); loadBottomHalf(d, y); // k <- sign_of_result if(x.is_signed_int()) { GBE_ASSERT(y.is_signed_int()); GBE_ASSERT(dest.is_signed_int()); I64ABS(k, a, b, e, flagReg); I64ABS(l, c, d, e, flagReg); if(insn.opcode == SEL_OP_I64DIV) p->XOR(k, k, l); } // (e,f) <- 0 p->MOV(e, zero); p->MOV(f, zero); // (g,h) <- 2**63 p->MOV(g, GenRegister::immud(0x80000000)); p->MOV(h, zero); // (i,j) <- 0 p->MOV(i, zero); p->MOV(j, zero); // m <- 0 p->MOV(m, zero); { uint32_t loop_start = p->n_instruction(); // (c,d,e,f) <- (c,d,e,f) / 2 p->SHR(f, f, one); p->SHL(l, e, imm31); p->OR(f, f, l); p->SHR(e, e, one); p->SHL(l, d, imm31); p->OR(e, e, l); p->SHR(d, d, one); p->SHL(l, c, imm31); p->OR(d, d, l); p->SHR(c, c, one); // condition <- (c,d)==0 && (a,b)>=(e,f) p->push(); p->curr.predicate = GEN_PREDICATE_NONE; p->MOV(l, zero); p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr()); p->CMP(GEN_CONDITIONAL_EQ, a, e); p->curr.predicate = GEN_PREDICATE_NORMAL; p->CMP(GEN_CONDITIONAL_GE, b, f); p->MOV(l, one); p->curr.predicate = GEN_PREDICATE_NONE; p->CMP(GEN_CONDITIONAL_G, a, e); p->curr.predicate = GEN_PREDICATE_NORMAL; p->MOV(l, one); p->curr.predicate = GEN_PREDICATE_NONE; p->CMP(GEN_CONDITIONAL_NEQ, l, zero); p->curr.predicate = GEN_PREDICATE_NORMAL; p->CMP(GEN_CONDITIONAL_EQ, c, zero); p->CMP(GEN_CONDITIONAL_EQ, d, zero); // under condition, (a,b) <- (a,b) - (e,f) p->MOV(l, f); subWithBorrow(b, b, l); subWithBorrow(a, a, l); p->MOV(l, e); subWithBorrow(a, a, l); // under condition, (i,j) <- (i,j) | (g,h) p->OR(i, i, g); p->OR(j, j, h); p->pop(); // (g,h) /= 2 p->SHR(h, h, one); p->SHL(l, g, imm31); p->OR(h, h, l); p->SHR(g, g, one); // condition: m < 64 p->ADD(m, m, one); p->push(); p->curr.predicate = GEN_PREDICATE_NONE; p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr()); p->CMP(GEN_CONDITIONAL_L, m, GenRegister::immud(64)); p->curr.predicate = GEN_PREDICATE_NORMAL; // under condition, jump back to start point if (simdWidth == 8) p->curr.predicate = GEN_PREDICATE_ALIGN1_ANY8H; else if (simdWidth == 16) p->curr.predicate = GEN_PREDICATE_ALIGN1_ANY16H; else NOT_IMPLEMENTED; p->curr.execWidth = 1; p->curr.noMask = 1; int jip = -(int)(p->n_instruction() - loop_start + 1) * 2; p->JMPI(zero); p->patchJMPI(p->n_instruction()-1, jip); p->pop(); // end of loop } // adjust sign of result if(x.is_signed_int()) { p->push(); p->curr.predicate = GEN_PREDICATE_NONE; p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr()); p->CMP(GEN_CONDITIONAL_NEQ, k, zero); p->curr.predicate = GEN_PREDICATE_NORMAL; if(insn.opcode == SEL_OP_I64DIV) I64Neg(i, j, l); else I64Neg(a, b, l); p->pop(); } // write dest if(insn.opcode == SEL_OP_I64DIV) { storeTopHalf(dest, i); storeBottomHalf(dest, j); } else { GBE_ASSERT(insn.opcode == SEL_OP_I64REM); storeTopHalf(dest, a); storeBottomHalf(dest, b); } } void GenContext::emitTernaryInstruction(const SelectionInstruction &insn) { const GenRegister dst = ra->genReg(insn.dst(0)); const GenRegister src0 = ra->genReg(insn.src(0)); const GenRegister src1 = ra->genReg(insn.src(1)); const GenRegister src2 = ra->genReg(insn.src(2)); switch (insn.opcode) { case SEL_OP_MAD: p->MAD(dst, src0, src1, src2); break; default: NOT_IMPLEMENTED; } } void GenContext::emitNoOpInstruction(const SelectionInstruction &insn) { NOT_IMPLEMENTED; } void GenContext::emitWaitInstruction(const SelectionInstruction &insn) { p->WAIT(); } void GenContext::emitBarrierInstruction(const SelectionInstruction &insn) { const GenRegister src = ra->genReg(insn.src(0)); p->BARRIER(src); } void GenContext::emitFenceInstruction(const SelectionInstruction &insn) { const GenRegister dst = ra->genReg(insn.dst(0)); p->FENCE(dst); p->MOV(dst, dst); } void GenContext::emitMathInstruction(const SelectionInstruction &insn) { const GenRegister dst = ra->genReg(insn.dst(0)); const GenRegister src0 = ra->genReg(insn.src(0)); const uint32_t function = insn.extra.function; if (insn.srcNum == 2) { const GenRegister src1 = ra->genReg(insn.src(1)); p->MATH(dst, function, src0, src1); } else p->MATH(dst, function, src0); } void GenContext::emitCompareInstruction(const SelectionInstruction &insn) { const GenRegister src0 = ra->genReg(insn.src(0)); const GenRegister src1 = ra->genReg(insn.src(1)); if (insn.opcode == SEL_OP_CMP) p->CMP(insn.extra.function, src0, src1); else { GBE_ASSERT(insn.opcode == SEL_OP_SEL_CMP); const GenRegister dst = ra->genReg(insn.dst(0)); p->SEL_CMP(insn.extra.function, dst, src0, src1); } } void GenContext::emitAtomicInstruction(const SelectionInstruction &insn) { const GenRegister src = ra->genReg(insn.src(0)); const GenRegister dst = ra->genReg(insn.dst(0)); const uint32_t function = insn.extra.function; const uint32_t bti = insn.extra.elem; p->ATOMIC(dst, function, src, bti, insn.srcNum); } void GenContext::emitIndirectMoveInstruction(const SelectionInstruction &insn) { GenRegister src = ra->genReg(insn.src(0)); if(isScalarReg(src.reg())) src = GenRegister::retype(src, GEN_TYPE_UW); else src = GenRegister::unpacked_uw(src.nr, src.subnr / typeSize(GEN_TYPE_UW)); const GenRegister dst = ra->genReg(insn.dst(0)); const GenRegister a0 = GenRegister::addr8(0); uint32_t simdWidth = p->curr.execWidth; p->push(); p->curr.execWidth = 8; p->curr.quarterControl = GEN_COMPRESSION_Q1; p->MOV(a0, src); p->MOV(dst, GenRegister::indirect(dst.type, 0, GEN_WIDTH_8)); p->pop(); if (simdWidth == 16) { p->push(); p->curr.execWidth = 8; p->curr.quarterControl = GEN_COMPRESSION_Q2; const GenRegister nextDst = GenRegister::Qn(dst, 1); const GenRegister nextSrc = GenRegister::Qn(src, 1); p->MOV(a0, nextSrc); p->MOV(nextDst, GenRegister::indirect(dst.type, 0, GEN_WIDTH_8)); p->pop(); } } void GenContext::emitJumpInstruction(const SelectionInstruction &insn) { const ir::LabelIndex label(insn.index); const GenRegister src = ra->genReg(insn.src(0)); this->branchPos2.push_back(std::make_pair(label, p->store.size())); p->JMPI(src); } void GenContext::emitEotInstruction(const SelectionInstruction &insn) { p->push(); p->curr.predicate = GEN_PREDICATE_NONE; p->curr.noMask = 1; p->MOV(GenRegister::ud8grf(112, 0), GenRegister::ud8grf(0, 0)); p->curr.execWidth = 8; p->EOT(112); p->pop(); } void GenContext::emitSpillRegInstruction(const SelectionInstruction &insn) { uint32_t simdWidth = p->curr.execWidth; uint32_t scratchOffset = insn.extra.scratchOffset; const uint32_t header = insn.extra.scratchMsgHeader; p->push(); const GenRegister msg = GenRegister::ud8grf(header, 0); const GenRegister src = ra->genReg(insn.src(0)); GenRegister payload = src; payload.nr = header + 1; payload.subnr = 0; p->MOV(payload, src); uint32_t regType = insn.src(0).type; uint32_t size = typeSize(regType); assert(size <= 4); uint32_t regNum = (stride(src.hstride)*size*simdWidth) > 32 ? 2 : 1; this->scratchWrite(msg, scratchOffset, regNum, regType, GEN_SCRATCH_CHANNEL_MODE_DWORD); p->pop(); } void GenContext::emitUnSpillRegInstruction(const SelectionInstruction &insn) { uint32_t scratchOffset = insn.extra.scratchOffset; const GenRegister dst = insn.dst(0); uint32_t regType = dst.type; uint32_t simdWidth = p->curr.execWidth; const uint32_t header = insn.extra.scratchMsgHeader; uint32_t size = typeSize(regType); assert(size <= 4); uint32_t regNum = (stride(dst.hstride)*size*simdWidth) > 32 ? 2 : 1; const GenRegister msg = GenRegister::ud8grf(header, 0); this->scratchRead(GenRegister::retype(dst, GEN_TYPE_UD), msg, scratchOffset, regNum, regType, GEN_SCRATCH_CHANNEL_MODE_DWORD); } // For SIMD8, we allocate 2*elemNum temporary registers from dst(0), and // then follow the real destination registers. // For SIMD16, we allocate elemNum temporary registers from dst(0). void GenContext::emitRead64Instruction(const SelectionInstruction &insn) { const uint32_t elemNum = insn.extra.elem; const uint32_t tmpRegSize = (p->curr.execWidth == 8) ? elemNum * 2 : elemNum; const GenRegister tempAddr = ra->genReg(insn.dst(0)); const GenRegister dst = ra->genReg(insn.dst(tmpRegSize + 1)); const GenRegister tmp = ra->genReg(insn.dst(1)); const GenRegister src = ra->genReg(insn.src(0)); const uint32_t bti = insn.extra.function; p->READ64(dst, tmp, tempAddr, src, bti, elemNum); } void GenContext::emitUntypedReadInstruction(const SelectionInstruction &insn) { const GenRegister dst = ra->genReg(insn.dst(0)); const GenRegister src = ra->genReg(insn.src(0)); const uint32_t bti = insn.extra.function; const uint32_t elemNum = insn.extra.elem; p->UNTYPED_READ(dst, src, bti, elemNum); } // For SIMD8, we allocate 2*elemNum temporary registers from dst(0), and // then follow the real destination registers. // For SIMD16, we allocate elemNum temporary registers from dst(0). void GenContext::emitWrite64Instruction(const SelectionInstruction &insn) { const GenRegister src = ra->genReg(insn.dst(0)); const uint32_t elemNum = insn.extra.elem; const GenRegister addr = ra->genReg(insn.src(0)); //tmpRegSize + 1)); const GenRegister data = ra->genReg(insn.src(1)); const uint32_t bti = insn.extra.function; p->MOV(src, addr); p->WRITE64(src, data, bti, elemNum, isScalarReg(data.reg())); } void GenContext::emitUntypedWriteInstruction(const SelectionInstruction &insn) { const GenRegister src = ra->genReg(insn.src(0)); const uint32_t bti = insn.extra.function; const uint32_t elemNum = insn.extra.elem; p->UNTYPED_WRITE(src, bti, elemNum); } void GenContext::emitByteGatherInstruction(const SelectionInstruction &insn) { const GenRegister dst = ra->genReg(insn.dst(0)); const GenRegister src = ra->genReg(insn.src(0)); const uint32_t bti = insn.extra.function; const uint32_t elemSize = insn.extra.elem; p->BYTE_GATHER(dst, src, bti, elemSize); } void GenContext::emitByteScatterInstruction(const SelectionInstruction &insn) { const GenRegister src = ra->genReg(insn.src(0)); const uint32_t bti = insn.extra.function; const uint32_t elemSize = insn.extra.elem; p->BYTE_SCATTER(src, bti, elemSize); } void GenContext::emitDWordGatherInstruction(const SelectionInstruction &insn) { const GenRegister dst = ra->genReg(insn.dst(0)); const GenRegister src = ra->genReg(insn.src(0)); const uint32_t bti = insn.extra.function; p->DWORD_GATHER(dst, src, bti); } void GenContext::emitSampleInstruction(const SelectionInstruction &insn) { const GenRegister dst = ra->genReg(insn.dst(0)); const GenRegister msgPayload = GenRegister::retype(ra->genReg(insn.src(0)), GEN_TYPE_F); const unsigned char bti = insn.extra.function; const unsigned char sampler = insn.extra.elem; const GenRegister ucoord = ra->genReg(insn.src(4)); const GenRegister vcoord = ra->genReg(insn.src(5)); const GenRegister wcoord = ra->genReg(insn.src(6)); uint32_t simdWidth = p->curr.execWidth; uint32_t coord_cnt = 2; p->push(); const uint32_t nr = msgPayload.nr; // prepare mesg desc and move to a0.0. // desc = bti | (sampler << 8) | (0 << 12) | (2 << 16) | (0 << 18) | (0 << 19) | (4 << 20) | (1 << 25) | (0 < 29) | (0 << 31) /* Prepare message payload. */ p->MOV(GenRegister::f8grf(nr , 0), ucoord); p->MOV(GenRegister::f8grf(nr + (simdWidth/8), 0), vcoord); if (insn.src(6).reg() != 0) { p->MOV(GenRegister::f8grf(nr + (simdWidth/4), 0), wcoord); coord_cnt++; } p->SAMPLE(dst, msgPayload, false, bti, sampler, coord_cnt, simdWidth, -1, 0); p->pop(); } void GenContext::scratchWrite(const GenRegister header, uint32_t offset, uint32_t reg_num, uint32_t reg_type, uint32_t channel_mode) { p->push(); uint32_t simdWidth = p->curr.execWidth; p->curr.predicate = GEN_PREDICATE_NONE; p->curr.noMask = 1; p->curr.execWidth = 8; p->MOV(header, GenRegister::ud8grf(0,0)); p->pop(); int size = typeSize(reg_type)*simdWidth; p->push(); p->SCRATCH_WRITE(header, offset/32, size, reg_num, channel_mode); p->pop(); } void GenContext::scratchRead(const GenRegister dst, const GenRegister header, uint32_t offset, uint32_t reg_num, uint32_t reg_type, uint32_t channel_mode) { p->push(); uint32_t simdWidth = p->curr.execWidth; p->curr.predicate = GEN_PREDICATE_NONE; p->curr.noMask = 1; p->curr.execWidth = 8; p->MOV(header, GenRegister::ud8grf(0,0)); p->pop(); int size = typeSize(reg_type)*simdWidth; p->push(); p->SCRATCH_READ(dst, header, offset/32, size, reg_num, channel_mode); p->pop(); } void GenContext::emitTypedWriteInstruction(const SelectionInstruction &insn) { const GenRegister header = GenRegister::retype(ra->genReg(insn.src(0)), GEN_TYPE_UD); const GenRegister ucoord = ra->genReg(insn.src(insn.extra.elem)); const GenRegister vcoord = ra->genReg(insn.src(1 + insn.extra.elem)); const GenRegister wcoord = ra->genReg(insn.src(2 + insn.extra.elem)); const GenRegister R = ra->genReg(insn.src(3 + insn.extra.elem)); const GenRegister G = ra->genReg(insn.src(4 + insn.extra.elem)); const GenRegister B = ra->genReg(insn.src(5 + insn.extra.elem)); const GenRegister A = ra->genReg(insn.src(6 + insn.extra.elem)); const unsigned char bti = insn.extra.function; p->push(); uint32_t simdWidth = p->curr.execWidth; const uint32_t nr = header.nr; p->curr.predicate = GEN_PREDICATE_NONE; p->curr.noMask = 1; p->MOV(header, GenRegister::immud(0x0)); p->curr.execWidth = 1; // prepare mesg desc and move to a0.0. // desc = bti | (msg_type << 14) | (header_present << 19)) // prepare header, we need to enable all the 8 planes. p->MOV(GenRegister::ud8grf(nr, 7), GenRegister::immud(0xffff)); p->curr.execWidth = 8; // Typed write only support SIMD8. // Prepare message payload U + V + R(ignored) + LOD(0) + RGBA. // Currently, we don't support non-zero lod, so we clear all lod to // zero for both quarters thus save one instruction here. // Thus we must put this instruction in noMask and no predication state. p->MOV(GenRegister::ud8grf(nr + 4, 0), GenRegister::immud(0)); //LOD p->pop(); p->push(); p->curr.execWidth = 8; // TYPED WRITE send instruction only support SIMD8, if we are SIMD16, we // need to call it twice. uint32_t quarterNum = (simdWidth == 8) ? 1 : 2; for( uint32_t quarter = 0; quarter < quarterNum; quarter++) { #define QUARTER_MOV0(dst_nr, src) p->MOV(GenRegister::ud8grf(dst_nr, 0), \ GenRegister::retype(GenRegister::QnPhysical(src, quarter), src.type)) #define QUARTER_MOV1(dst_nr, src) p->MOV(GenRegister::retype(GenRegister::ud8grf(dst_nr, 0), src.type), \ GenRegister::retype(GenRegister::QnPhysical(src,quarter), src.type)) if (quarter == 1) p->curr.quarterControl = GEN_COMPRESSION_Q2; QUARTER_MOV0(nr + 1, ucoord); QUARTER_MOV0(nr + 2, vcoord); if (insn.src(2 + insn.extra.elem).reg() != 0) QUARTER_MOV0(nr + 3, wcoord); QUARTER_MOV1(nr + 5, R); QUARTER_MOV1(nr + 6, G); QUARTER_MOV1(nr + 7, B); QUARTER_MOV1(nr + 8, A); #undef QUARTER_MOV p->TYPED_WRITE(header, true, bti); } p->pop(); } void GenContext::emitGetImageInfoInstruction(const SelectionInstruction &insn) { const unsigned char bti = insn.extra.function; const unsigned char type = insn.extra.elem; const uint32_t dstNum = ir::GetImageInfoInstruction::getDstNum4Type(type); ir::ImageInfoKey key; key.index = bti; key.type = type; uint32_t offset = this->getImageInfoCurbeOffset(key, dstNum * 4) + GEN_REG_SIZE; for(uint32_t i = 0; i < dstNum; i++) { const uint32_t nr = offset / GEN_REG_SIZE; const uint32_t subnr = (offset % GEN_REG_SIZE) / sizeof(uint32_t); p->MOV(ra->genReg(insn.dst(i)), GenRegister::ud1grf(nr, subnr)); offset += 32; } } BVAR(OCL_OUTPUT_REG_ALLOC, false); BVAR(OCL_OUTPUT_ASM, false); bool GenContext::emitCode(void) { GenKernel *genKernel = static_cast(this->kernel); sel->select(); schedulePreRegAllocation(*this, *this->sel); if (UNLIKELY(ra->allocate(*this->sel) == false)) return false; schedulePostRegAllocation(*this, *this->sel); if (OCL_OUTPUT_REG_ALLOC) ra->outputAllocation(); this->clearFlagRegister(); this->emitStackPointer(); this->emitInstructionStream(); this->patchBranches(); genKernel->insnNum = p->store.size(); genKernel->insns = GBE_NEW_ARRAY_NO_ARG(GenInstruction, genKernel->insnNum); std::memcpy(genKernel->insns, &p->store[0], genKernel->insnNum * sizeof(GenInstruction)); if (OCL_OUTPUT_ASM) for (uint32_t insnID = 0; insnID < genKernel->insnNum; ++insnID) gen_disasm(stdout, &p->store[insnID]); return true; } Kernel *GenContext::allocateKernel(void) { return GBE_NEW(GenKernel, name); } } /* namespace gbe */ Release_v0.3/backend/src/backend/gen_context.hpp000066400000000000000000000170001223142177000217740ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file gen_context.hpp * \author Benjamin Segovia */ #ifndef __GBE_GEN_CONTEXT_HPP__ #define __GBE_GEN_CONTEXT_HPP__ #include "backend/context.hpp" #include "backend/program.h" #include "backend/gen_register.hpp" #include "ir/function.hpp" #include "ir/liveness.hpp" #include "sys/map.hpp" #include namespace gbe { class Kernel; // We build this structure class GenEncoder; // Helps emitting Gen ISA class GenRegAllocator; // Handle the register allocation class Selection; // Performs instruction selection class SelectionInstruction; // Pre-RA Gen instruction class SelectionReg; // Pre-RA Gen register class GenRegister; /*! Context is the helper structure to build the Gen ISA or simulation code * from GenIR */ class GenContext : public Context { public: /*! Create a new context. name is the name of the function we want to * compile */ GenContext(const ir::Unit &unit, const std::string &name, bool limitRegisterPressure = false); /*! Release everything needed */ ~GenContext(void); /*! Implements base class */ virtual bool emitCode(void); /*! Function we emit code for */ INLINE const ir::Function &getFunction(void) const { return fn; } /*! Simd width chosen for the current function */ INLINE uint32_t getSimdWidth(void) const { return simdWidth; } void clearFlagRegister(void); /*! Emit the per-lane stack pointer computation */ void emitStackPointer(void); /*! Emit the instructions */ void emitInstructionStream(void); /*! Set the correct target values for the branches */ void patchBranches(void); /*! Forward ir::Function isSpecialReg method */ INLINE bool isSpecialReg(ir::Register reg) const { return fn.isSpecialReg(reg); } /*! Get the liveOut information for the given block */ INLINE const ir::Liveness::LiveOut &getLiveOut(const ir::BasicBlock *bb) const { return this->liveness->getLiveOut(bb); } void collectShifter(GenRegister dest, GenRegister src); void loadTopHalf(GenRegister dest, GenRegister src); void storeTopHalf(GenRegister dest, GenRegister src); void loadBottomHalf(GenRegister dest, GenRegister src); void storeBottomHalf(GenRegister dest, GenRegister src); void addWithCarry(GenRegister dest, GenRegister src0, GenRegister src1); void subWithBorrow(GenRegister dest, GenRegister src0, GenRegister src1); void I64Neg(GenRegister high, GenRegister low, GenRegister tmp); void I64ABS(GenRegister sign, GenRegister high, GenRegister low, GenRegister tmp, GenRegister flagReg); void I64FullAdd(GenRegister high1, GenRegister low1, GenRegister high2, GenRegister low2); void I32FullMult(GenRegister high, GenRegister low, GenRegister src0, GenRegister src1); void I64FullMult(GenRegister dst1, GenRegister dst2, GenRegister dst3, GenRegister dst4, GenRegister x_high, GenRegister x_low, GenRegister y_high, GenRegister y_low); void saveFlag(GenRegister dest, int flag, int subFlag); void UnsignedI64ToFloat(GenRegister dst, GenRegister high, GenRegister low, GenRegister tmp); /*! Final Gen ISA emission helper functions */ void emitLabelInstruction(const SelectionInstruction &insn); void emitUnaryInstruction(const SelectionInstruction &insn); void emitUnaryWithTempInstruction(const SelectionInstruction &insn); void emitBinaryInstruction(const SelectionInstruction &insn); void emitBinaryWithTempInstruction(const SelectionInstruction &insn); void emitTernaryInstruction(const SelectionInstruction &insn); void emitI64MULHIInstruction(const SelectionInstruction &insn); void emitI64MADSATInstruction(const SelectionInstruction &insn); void emitI64HADDInstruction(const SelectionInstruction &insn); void emitI64RHADDInstruction(const SelectionInstruction &insn); void emitI64ShiftInstruction(const SelectionInstruction &insn); void emitI64CompareInstruction(const SelectionInstruction &insn); void emitI64SATADDInstruction(const SelectionInstruction &insn); void emitI64SATSUBInstruction(const SelectionInstruction &insn); void emitI64ToFloatInstruction(const SelectionInstruction &insn); void emitCompareInstruction(const SelectionInstruction &insn); void emitJumpInstruction(const SelectionInstruction &insn); void emitIndirectMoveInstruction(const SelectionInstruction &insn); void emitEotInstruction(const SelectionInstruction &insn); void emitNoOpInstruction(const SelectionInstruction &insn); void emitWaitInstruction(const SelectionInstruction &insn); void emitBarrierInstruction(const SelectionInstruction &insn); void emitFenceInstruction(const SelectionInstruction &insn); void emitMathInstruction(const SelectionInstruction &insn); void emitRead64Instruction(const SelectionInstruction &insn); void emitWrite64Instruction(const SelectionInstruction &insn); void emitUntypedReadInstruction(const SelectionInstruction &insn); void emitUntypedWriteInstruction(const SelectionInstruction &insn); void emitAtomicInstruction(const SelectionInstruction &insn); void emitByteGatherInstruction(const SelectionInstruction &insn); void emitByteScatterInstruction(const SelectionInstruction &insn); void emitDWordGatherInstruction(const SelectionInstruction &insn); void emitSampleInstruction(const SelectionInstruction &insn); void emitTypedWriteInstruction(const SelectionInstruction &insn); void emitSpillRegInstruction(const SelectionInstruction &insn); void emitUnSpillRegInstruction(const SelectionInstruction &insn); void emitGetImageInfoInstruction(const SelectionInstruction &insn); void emitI64MULInstruction(const SelectionInstruction &insn); void emitI64DIVREMInstruction(const SelectionInstruction &insn); void scratchWrite(const GenRegister header, uint32_t offset, uint32_t reg_num, uint32_t reg_type, uint32_t channel_mode); void scratchRead(const GenRegister dst, const GenRegister header, uint32_t offset, uint32_t reg_num, uint32_t reg_type, uint32_t channel_mode); /*! Implements base class */ virtual Kernel *allocateKernel(void); /*! Store the position of each label instruction in the Gen ISA stream */ map labelPos; /*! Store the Gen instructions to patch */ vector> branchPos2; /*! Encode Gen ISA */ GenEncoder *p; /*! Instruction selection on Gen ISA (pre-register allocation) */ Selection *sel; /*! Perform the register allocation */ GenRegAllocator *ra; /*! Indicate if we need to tackle a register pressure issue when * regenerating the code */ bool limitRegisterPressure; }; } /* namespace gbe */ #endif /* __GBE_GEN_CONTEXT_HPP__ */ Release_v0.3/backend/src/backend/gen_defs.hpp000066400000000000000000000652071223142177000212450ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /* Copyright (C) Intel Corp. 2006. All Rights Reserved. Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to develop this 3D driver. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice (including the next paragraph) shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. **********************************************************************/ /* * Authors: * Keith Whitwell */ #ifndef __GEN_DEFS_HPP__ #define __GEN_DEFS_HPP__ #include ///////////////////////////////////////////////////////////////////////////// // Gen EU defines ///////////////////////////////////////////////////////////////////////////// /* Execution Unit (EU) defines */ #define GEN_ALIGN_1 0 #define GEN_ALIGN_16 1 #define GEN_REG_SIZE 32 #define GEN_ADDRESS_DIRECT 0 #define GEN_ADDRESS_REGISTER_INDIRECT_REGISTER 1 #define GEN_CHANNEL_X 0 #define GEN_CHANNEL_Y 1 #define GEN_CHANNEL_Z 2 #define GEN_CHANNEL_W 3 #define GEN_COMPRESSION_Q1 0 #define GEN_COMPRESSION_Q2 1 #define GEN_COMPRESSION_Q3 2 #define GEN_COMPRESSION_Q4 3 #define GEN_COMPRESSION_H1 0 #define GEN_COMPRESSION_H2 2 #define GEN_CONDITIONAL_NONE 0 #define GEN_CONDITIONAL_Z 1 #define GEN_CONDITIONAL_NZ 2 #define GEN_CONDITIONAL_EQ 1 /* Z */ #define GEN_CONDITIONAL_NEQ 2 /* NZ */ #define GEN_CONDITIONAL_G 3 #define GEN_CONDITIONAL_GE 4 #define GEN_CONDITIONAL_L 5 #define GEN_CONDITIONAL_LE 6 #define GEN_CONDITIONAL_R 7 #define GEN_CONDITIONAL_O 8 #define GEN_CONDITIONAL_U 9 #define GEN_DEBUG_NONE 0 #define GEN_DEBUG_BREAKPOINT 1 #define GEN_DEPENDENCY_NORMAL 0 #define GEN_DEPENDENCY_NOTCLEARED 1 #define GEN_DEPENDENCY_NOTCHECKED 2 #define GEN_DEPENDENCY_DISABLE 3 #define GEN_HORIZONTAL_STRIDE_0 0 #define GEN_HORIZONTAL_STRIDE_1 1 #define GEN_HORIZONTAL_STRIDE_2 2 #define GEN_HORIZONTAL_STRIDE_4 3 #define GEN_INSTRUCTION_NORMAL 0 #define GEN_INSTRUCTION_SATURATE 1 #define GEN_MASK_ENABLE 0 #define GEN_MASK_DISABLE 1 /*! Gen opcode */ enum opcode { GEN_OPCODE_MOV = 1, GEN_OPCODE_SEL = 2, GEN_OPCODE_NOT = 4, GEN_OPCODE_AND = 5, GEN_OPCODE_OR = 6, GEN_OPCODE_XOR = 7, GEN_OPCODE_SHR = 8, GEN_OPCODE_SHL = 9, GEN_OPCODE_RSR = 10, GEN_OPCODE_RSL = 11, GEN_OPCODE_ASR = 12, GEN_OPCODE_CMP = 16, GEN_OPCODE_CMPN = 17, GEN_OPCODE_JMPI = 32, GEN_OPCODE_IF = 34, GEN_OPCODE_IFF = 35, GEN_OPCODE_ELSE = 36, GEN_OPCODE_ENDIF = 37, GEN_OPCODE_DO = 38, GEN_OPCODE_WHILE = 39, GEN_OPCODE_BREAK = 40, GEN_OPCODE_CONTINUE = 41, GEN_OPCODE_HALT = 42, GEN_OPCODE_MSAVE = 44, GEN_OPCODE_MRESTORE = 45, GEN_OPCODE_PUSH = 46, GEN_OPCODE_POP = 47, GEN_OPCODE_WAIT = 48, GEN_OPCODE_SEND = 49, GEN_OPCODE_SENDC = 50, GEN_OPCODE_MATH = 56, GEN_OPCODE_ADD = 64, GEN_OPCODE_MUL = 65, GEN_OPCODE_AVG = 66, GEN_OPCODE_FRC = 67, GEN_OPCODE_RNDU = 68, GEN_OPCODE_RNDD = 69, GEN_OPCODE_RNDE = 70, GEN_OPCODE_RNDZ = 71, GEN_OPCODE_MAC = 72, GEN_OPCODE_MACH = 73, GEN_OPCODE_LZD = 74, GEN_OPCODE_FBH = 75, GEN_OPCODE_FBL = 76, GEN_OPCODE_ADDC = 78, GEN_OPCODE_SUBB = 79, GEN_OPCODE_SAD2 = 80, GEN_OPCODE_SADA2 = 81, GEN_OPCODE_DP4 = 84, GEN_OPCODE_DPH = 85, GEN_OPCODE_DP3 = 86, GEN_OPCODE_DP2 = 87, GEN_OPCODE_DPA2 = 88, GEN_OPCODE_LINE = 89, GEN_OPCODE_PLN = 90, GEN_OPCODE_MAD = 91, GEN_OPCODE_NOP = 126, }; #define GEN_ATOMIC_SIMD16 0 #define GEN_ATOMIC_SIMD8 1 enum GenAtomicOpCode { GEN_ATOMIC_OP_CMPWR8B = 0, GEN_ATOMIC_OP_AND = 1, GEN_ATOMIC_OP_OR = 2, GEN_ATOMIC_OP_XOR = 3, GEN_ATOMIC_OP_MOV = 4, GEN_ATOMIC_OP_INC = 5, GEN_ATOMIC_OP_DEC = 6, GEN_ATOMIC_OP_ADD = 7, GEN_ATOMIC_OP_SUB = 8, GEN_ATOMIC_OP_REVSUB = 9, GEN_ATOMIC_OP_IMAX = 10, GEN_ATOMIC_OP_IMIN = 11, GEN_ATOMIC_OP_UMAX = 12, GEN_ATOMIC_OP_UMIN = 13, GEN_ATOMIC_OP_CMPWR = 14, GEN_ATOMIC_OP_PREDEC = 15 }; /*! Gen SFID */ enum GenMessageTarget { GEN_SFID_NULL = 0, GEN_SFID_MATH = 1, GEN_SFID_SAMPLER = 2, GEN_SFID_MESSAGE_GATEWAY = 3, GEN_SFID_DATAPORT_READ = 4, GEN_SFID_DATAPORT_WRITE = 5, GEN_SFID_URB = 6, GEN_SFID_THREAD_SPAWNER = 7, GEN6_SFID_DATAPORT_SAMPLER_CACHE = 4, GEN6_SFID_DATAPORT_RENDER_CACHE = 5, GEN6_SFID_DATAPORT_CONSTANT_CACHE = 9, GEN_SFID_DATAPORT_DATA_CACHE = 10, }; #define GEN_PREDICATE_NONE 0 #define GEN_PREDICATE_NORMAL 1 #define GEN_PREDICATE_ALIGN1_ANYV 2 #define GEN_PREDICATE_ALIGN1_ALLV 3 #define GEN_PREDICATE_ALIGN1_ANY2H 4 #define GEN_PREDICATE_ALIGN1_ALL2H 5 #define GEN_PREDICATE_ALIGN1_ANY4H 6 #define GEN_PREDICATE_ALIGN1_ALL4H 7 #define GEN_PREDICATE_ALIGN1_ANY8H 8 #define GEN_PREDICATE_ALIGN1_ALL8H 9 #define GEN_PREDICATE_ALIGN1_ANY16H 10 #define GEN_PREDICATE_ALIGN1_ALL16H 11 #define GEN_PREDICATE_ALIGN16_REPLICATE_X 2 #define GEN_PREDICATE_ALIGN16_REPLICATE_Y 3 #define GEN_PREDICATE_ALIGN16_REPLICATE_Z 4 #define GEN_PREDICATE_ALIGN16_REPLICATE_W 5 #define GEN_PREDICATE_ALIGN16_ANY4H 6 #define GEN_PREDICATE_ALIGN16_ALL4H 7 #define GEN_ARCHITECTURE_REGISTER_FILE 0 #define GEN_GENERAL_REGISTER_FILE 1 #define GEN_IMMEDIATE_VALUE 3 #define GEN_TYPE_UD 0 #define GEN_TYPE_D 1 #define GEN_TYPE_UW 2 #define GEN_TYPE_W 3 #define GEN_TYPE_UB 4 #define GEN_TYPE_B 5 #define GEN_TYPE_VF 5 /* packed float vector, immediates only? */ #define GEN_TYPE_HF 6 #define GEN_TYPE_V 6 /* packed int vector, immediates only, uword dest only */ #define GEN_TYPE_DF 6 #define GEN_TYPE_F 7 #define GEN_TYPE_UL 8 #define GEN_TYPE_L 9 #define GEN_ARF_NULL 0x00 #define GEN_ARF_ADDRESS 0x10 #define GEN_ARF_ACCUMULATOR 0x20 #define GEN_ARF_FLAG 0x30 #define GEN_ARF_MASK 0x40 #define GEN_ARF_MASK_STACK 0x50 #define GEN_ARF_MASK_STACK_DEPTH 0x60 #define GEN_ARF_STATE 0x70 #define GEN_ARF_CONTROL 0x80 #define GEN_ARF_NOTIFICATION_COUNT 0x90 #define GEN_ARF_IP 0xA0 #define GEN_MRF_COMPR4 (1 << 7) #define GEN_AMASK 0 #define GEN_IMASK 1 #define GEN_LMASK 2 #define GEN_CMASK 3 #define GEN_THREAD_NORMAL 0 #define GEN_THREAD_ATOMIC 1 #define GEN_THREAD_SWITCH 2 #define GEN_VERTICAL_STRIDE_0 0 #define GEN_VERTICAL_STRIDE_1 1 #define GEN_VERTICAL_STRIDE_2 2 #define GEN_VERTICAL_STRIDE_4 3 #define GEN_VERTICAL_STRIDE_8 4 #define GEN_VERTICAL_STRIDE_16 5 #define GEN_VERTICAL_STRIDE_32 6 #define GEN_VERTICAL_STRIDE_64 7 #define GEN_VERTICAL_STRIDE_128 8 #define GEN_VERTICAL_STRIDE_256 9 #define GEN_VERTICAL_STRIDE_ONE_DIMENSIONAL 0xF /* Execution width */ #define GEN_WIDTH_1 0 #define GEN_WIDTH_2 1 #define GEN_WIDTH_4 2 #define GEN_WIDTH_8 3 #define GEN_WIDTH_16 4 #define GEN_WIDTH_32 5 /* Channels to enable for the untyped reads and writes */ #define GEN_UNTYPED_RED (1 << 0) #define GEN_UNTYPED_GREEN (1 << 1) #define GEN_UNTYPED_BLUE (1 << 2) #define GEN_UNTYPED_ALPHA (1 << 3) /* SIMD mode for untyped reads and writes */ #define GEN_UNTYPED_SIMD4x2 0 #define GEN_UNTYPED_SIMD16 1 #define GEN_UNTYPED_SIMD8 2 /* SIMD mode for byte scatters / gathers */ #define GEN_BYTE_SCATTER_SIMD8 0 #define GEN_BYTE_SCATTER_SIMD16 1 /* Data port message type*/ #define GEN_OBLOCK_READ 0 //0000: OWord Block Read #define GEN_UNALIGNED_OBLOCK_READ 1 //0001: Unaligned OWord Block Read #define GEN_ODBLOCK_READ 2 //0010: OWord Dual Block Read #define GEN_DWORD_GATHER 3 //0011: DWord Scattered Read #define GEN_BYTE_GATHER 4 //0100: Byte Scattered Read #define GEN_UNTYPED_READ 5 //0101: Untyped Surface Read #define GEN_UNTYPED_ATOMIC_READ 6 //0110: Untyped Atomic Operation #define GEN_MEMORY_FENCE 7 //0111: Memory Fence #define GEN_OBLOCK_WRITE 8 //1000: OWord Block Write #define GEN_ODBLOCK_WRITE 10//1010: OWord Dual Block Write #define GEN_DWORD_SCATTER 11//1011: DWord Scattered Write #define GEN_BYTE_SCATTER 12//1100: Byte Scattered Write #define GEN_UNTYPED_WRITE 13//1101: Untyped Surface Write /* Data port data cache scratch messages*/ #define GEN_SCRATCH_READ 0 #define GEN_SCRATCH_WRITE 1 #define GEN_SCRATCH_CHANNEL_MODE_OWORD 0 #define GEN_SCRATCH_CHANNEL_MODE_DWORD 1 #define GEN_SCRATCH_BLOCK_SIZE_1 0 #define GEN_SCRATCH_BLOCK_SIZE_2 1 #define GEN_SCRATCH_BLOCK_SIZE_4 3 /* Data port render cache Message Type*/ #define GEN_MBLOCK_READ 4 //0100: Media Block Read #define GEN_TYPED_READ 5 //0101: Typed Surface Read #define GEN_TYPED_ATOMIC 6 //0110: Typed Atomic Operation #define GEN_MEM_FENCE 7 //0111: Memory Fence #define GEN_MBLOCK_WRITE 10 //1010: Media Block Write #define GEN_RENDER_WRITE 12 //1100: Render Target Write #define GEN_TYPED_WRITE 13 //1101: Typed Surface Write /* For byte scatters and gathers, the element to write */ #define GEN_BYTE_SCATTER_BYTE 0 #define GEN_BYTE_SCATTER_WORD 1 #define GEN_BYTE_SCATTER_DWORD 2 #define GEN_BYTE_SCATTER_QWORD 3 /* dword scattered rw */ #define GEN_DWORD_SCATTER_8_DWORDS 2 #define GEN_DWORD_SCATTER_16_DWORDS 3 #define GEN_SAMPLER_RETURN_FORMAT_FLOAT32 0 #define GEN_SAMPLER_RETURN_FORMAT_UINT32 2 #define GEN_SAMPLER_RETURN_FORMAT_SINT32 3 #define GEN_SAMPLER_MESSAGE_SIMD8_SAMPLE 0 #define GEN_SAMPLER_MESSAGE_SIMD16_SAMPLE 0 #define GEN_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS 0 #define GEN_SAMPLER_MESSAGE_SIMD8_KILLPIX 1 #define GEN_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD 1 #define GEN_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD 1 #define GEN_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS 2 #define GEN_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS 2 #define GEN_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE 0 #define GEN_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE 2 #define GEN_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE 0 #define GEN_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD_COMPARE 1 #define GEN_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE 1 #define GEN_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2 #define GEN_SAMPLER_MESSAGE_SIMD16_RESINFO 2 #define GEN_SAMPLER_MESSAGE_SIMD4X2_LD 3 #define GEN_SAMPLER_MESSAGE_SIMD8_LD 3 #define GEN_SAMPLER_MESSAGE_SIMD16_LD 3 #define GEN5_SAMPLER_MESSAGE_SAMPLE 0 #define GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS 1 #define GEN5_SAMPLER_MESSAGE_SAMPLE_LOD 2 #define GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE 3 #define GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS 4 #define GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE 5 #define GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE 6 #define GEN5_SAMPLER_MESSAGE_SAMPLE_LD 7 #define GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO 10 /* for GEN5 only */ #define GEN_SAMPLER_SIMD_MODE_SIMD4X2 0 #define GEN_SAMPLER_SIMD_MODE_SIMD8 1 #define GEN_SAMPLER_SIMD_MODE_SIMD16 2 #define GEN_SAMPLER_SIMD_MODE_SIMD32_64 3 #define GEN_MATH_FUNCTION_INV 1 #define GEN_MATH_FUNCTION_LOG 2 #define GEN_MATH_FUNCTION_EXP 3 #define GEN_MATH_FUNCTION_SQRT 4 #define GEN_MATH_FUNCTION_RSQ 5 #define GEN_MATH_FUNCTION_SIN 6 /* was 7 */ #define GEN_MATH_FUNCTION_COS 7 /* was 8 */ #define GEN_MATH_FUNCTION_FDIV 9 /* gen6+ */ #define GEN_MATH_FUNCTION_POW 10 #define GEN_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11 #define GEN_MATH_FUNCTION_INT_DIV_QUOTIENT 12 #define GEN_MATH_FUNCTION_INT_DIV_REMAINDER 13 #define GEN_MATH_INTEGER_UNSIGNED 0 #define GEN_MATH_INTEGER_SIGNED 1 #define GEN_MATH_PRECISION_FULL 0 #define GEN_MATH_PRECISION_PARTIAL 1 #define GEN_MATH_SATURATE_NONE 0 #define GEN_MATH_SATURATE_SATURATE 1 #define GEN_MATH_DATA_VECTOR 0 #define GEN_MATH_DATA_SCALAR 1 #define GEN_DEREFERENCE_URB 0 #define GEN_DO_NOT_DEREFERENCE_URB 1 #define GEN_MAX_NUM_BUFFER_ENTRIES (1 << 27) /* Message gateway */ #define GEN_OPEN_GATEWAY 0b000 #define GEN_CLOSE_GATEWAY 0b001 #define GEN_FORWARD_MSG 0b010 #define GEN_GET_TIME_STAMP 0b011 #define GEN_BARRIER_MSG 0b100 #define GEN_UPDATE_GATEWAT_STATE 0b101 #define GEN_MMIO_READ_WRITE 0b110 ///////////////////////////////////////////////////////////////////////////// // Gen EU structures ///////////////////////////////////////////////////////////////////////////// /** Number of general purpose registers (VS, WM, etc) */ #define GEN_MAX_GRF 128 /* Instruction format for the execution units */ struct GenInstruction { struct { uint32_t opcode:7; uint32_t pad:1; uint32_t access_mode:1; uint32_t mask_control:1; uint32_t dependency_control:2; uint32_t quarter_control:2; uint32_t thread_control:2; uint32_t predicate_control:4; uint32_t predicate_inverse:1; uint32_t execution_size:3; uint32_t destreg_or_condmod:4; uint32_t acc_wr_control:1; uint32_t cmpt_control:1; uint32_t debug_control:1; uint32_t saturate:1; } header; union { struct { uint32_t dest_reg_file:2; uint32_t dest_reg_type:3; uint32_t src0_reg_file:2; uint32_t src0_reg_type:3; uint32_t src1_reg_file:2; uint32_t src1_reg_type:3; uint32_t nib_ctrl:1; uint32_t dest_subreg_nr:5; uint32_t dest_reg_nr:8; uint32_t dest_horiz_stride:2; uint32_t dest_address_mode:1; } da1; struct { uint32_t dest_reg_file:2; uint32_t dest_reg_type:3; uint32_t src0_reg_file:2; uint32_t src0_reg_type:3; uint32_t src1_reg_file:2; /* 0x00000c00 */ uint32_t src1_reg_type:3; /* 0x00007000 */ uint32_t nib_ctrl:1; int dest_indirect_offset:10; /* offset against the deref'd address reg */ uint32_t dest_subreg_nr:3; /* subnr for the address reg a0.x */ uint32_t dest_horiz_stride:2; uint32_t dest_address_mode:1; } ia1; struct { uint32_t dest_reg_file:2; uint32_t dest_reg_type:3; uint32_t src0_reg_file:2; uint32_t src0_reg_type:3; uint32_t src1_reg_file:2; uint32_t src1_reg_type:3; uint32_t nib_ctrl:1; uint32_t dest_writemask:4; uint32_t dest_subreg_nr:1; uint32_t dest_reg_nr:8; uint32_t dest_horiz_stride:2; uint32_t dest_address_mode:1; } da16; struct { uint32_t dest_reg_file:2; uint32_t dest_reg_type:3; uint32_t src0_reg_file:2; uint32_t src0_reg_type:3; uint32_t nib_ctrl:1; uint32_t dest_writemask:4; int dest_indirect_offset:6; uint32_t dest_subreg_nr:3; uint32_t dest_horiz_stride:2; uint32_t dest_address_mode:1; } ia16; struct { uint32_t dest_reg_file:2; uint32_t dest_reg_type:3; uint32_t src0_reg_file:2; uint32_t src0_reg_type:3; uint32_t src1_reg_file:2; uint32_t src1_reg_type:3; uint32_t pad:1; int jump_count:16; } branch_gen6; struct { uint32_t dest_reg_file:1; uint32_t flag_subreg_num:1; uint32_t pad0:2; uint32_t src0_abs:1; uint32_t src0_negate:1; uint32_t src1_abs:1; uint32_t src1_negate:1; uint32_t src2_abs:1; uint32_t src2_negate:1; uint32_t pad1:7; uint32_t dest_writemask:4; uint32_t dest_subreg_nr:3; uint32_t dest_reg_nr:8; } da3src; } bits1; union { struct { uint32_t src0_subreg_nr:5; uint32_t src0_reg_nr:8; uint32_t src0_abs:1; uint32_t src0_negate:1; uint32_t src0_address_mode:1; uint32_t src0_horiz_stride:2; uint32_t src0_width:3; uint32_t src0_vert_stride:4; uint32_t flag_sub_reg_nr:1; uint32_t flag_reg_nr:1; uint32_t pad:5; } da1; struct { int src0_indirect_offset:10; uint32_t src0_subreg_nr:3; uint32_t src0_abs:1; uint32_t src0_negate:1; uint32_t src0_address_mode:1; uint32_t src0_horiz_stride:2; uint32_t src0_width:3; uint32_t src0_vert_stride:4; uint32_t flag_sub_reg_nr:1; uint32_t flag_reg_nr:1; uint32_t pad:5; } ia1; struct { uint32_t src0_swz_x:2; uint32_t src0_swz_y:2; uint32_t src0_subreg_nr:1; uint32_t src0_reg_nr:8; uint32_t src0_abs:1; uint32_t src0_negate:1; uint32_t src0_address_mode:1; uint32_t src0_swz_z:2; uint32_t src0_swz_w:2; uint32_t pad0:1; uint32_t src0_vert_stride:4; uint32_t flag_sub_reg_nr:1; uint32_t flag_reg_nr:1; uint32_t pad:5; } da16; struct { uint32_t src0_swz_x:2; uint32_t src0_swz_y:2; int src0_indirect_offset:6; uint32_t src0_subreg_nr:3; uint32_t src0_abs:1; uint32_t src0_negate:1; uint32_t src0_address_mode:1; uint32_t src0_swz_z:2; uint32_t src0_swz_w:2; uint32_t pad0:1; uint32_t src0_vert_stride:4; uint32_t flag_sub_reg_nr:1; uint32_t flag_reg_nr:1; uint32_t pad:5; } ia16; struct { uint32_t src0_rep_ctrl:1; uint32_t src0_swizzle:8; uint32_t src0_subreg_nr:3; uint32_t src0_reg_nr:8; uint32_t pad0:1; uint32_t src1_rep_ctrl:1; uint32_t src1_swizzle:8; uint32_t src1_subreg_nr_low:2; } da3src; } bits2; union { struct { uint32_t src1_subreg_nr:5; uint32_t src1_reg_nr:8; uint32_t src1_abs:1; uint32_t src1_negate:1; uint32_t src1_address_mode:1; uint32_t src1_horiz_stride:2; uint32_t src1_width:3; uint32_t src1_vert_stride:4; uint32_t pad0:7; } da1; struct { uint32_t src1_swz_x:2; uint32_t src1_swz_y:2; uint32_t src1_subreg_nr:1; uint32_t src1_reg_nr:8; uint32_t src1_abs:1; uint32_t src1_negate:1; uint32_t src1_address_mode:1; uint32_t src1_swz_z:2; uint32_t src1_swz_w:2; uint32_t pad1:1; uint32_t src1_vert_stride:4; uint32_t pad2:7; } da16; struct { int src1_indirect_offset:10; uint32_t src1_subreg_nr:3; uint32_t src1_abs:1; uint32_t src1_negate:1; uint32_t src1_address_mode:1; uint32_t src1_horiz_stride:2; uint32_t src1_width:3; uint32_t src1_vert_stride:4; uint32_t pad1:7; } ia1; struct { uint32_t src1_swz_x:2; uint32_t src1_swz_y:2; int src1_indirect_offset:6; uint32_t src1_subreg_nr:3; uint32_t src1_abs:1; uint32_t src1_negate:1; uint32_t pad0:1; uint32_t src1_swz_z:2; uint32_t src1_swz_w:2; uint32_t pad1:1; uint32_t src1_vert_stride:4; uint32_t pad2:7; } ia16; struct { uint32_t function_control:19; uint32_t header_present:1; uint32_t response_length:5; uint32_t msg_length:4; uint32_t pad1:2; uint32_t end_of_thread:1; } generic_gen5; struct { uint32_t sub_function_id:3; uint32_t pad0:11; uint32_t ack_req:1; uint32_t notify:2; uint32_t pad1:2; uint32_t header:1; uint32_t response_length:5; uint32_t msg_length:4; uint32_t pad2:2; uint32_t end_of_thread:1; } msg_gateway; struct { uint32_t opcode:1; uint32_t request:1; uint32_t pad0:2; uint32_t resource:1; uint32_t pad1:14; uint32_t header:1; uint32_t response_length:5; uint32_t msg_length:4; uint32_t pad2:2; uint32_t end_of_thread:1; } spawner_gen5; /** Ironlake PRM, Volume 4 Part 1, Section 6.1.1.1 */ struct { uint32_t function:4; uint32_t int_type:1; uint32_t precision:1; uint32_t saturate:1; uint32_t data_type:1; uint32_t snapshot:1; uint32_t pad0:10; uint32_t header_present:1; uint32_t response_length:5; uint32_t msg_length:4; uint32_t pad1:2; uint32_t end_of_thread:1; } math_gen5; struct { uint32_t bti:8; uint32_t sampler:4; uint32_t msg_type:5; uint32_t simd_mode:2; uint32_t header_present:1; uint32_t response_length:5; uint32_t msg_length:4; uint32_t pad1:2; uint32_t end_of_thread:1; } sampler_gen7; /** * Message for the Sandybridge Sampler Cache or Constant Cache Data Port. * * See the Sandybridge PRM, Volume 4 Part 1, Section 3.9.2.1.1. **/ struct { uint32_t bti:8; uint32_t msg_control:5; uint32_t msg_type:3; uint32_t pad0:3; uint32_t header_present:1; uint32_t response_length:5; uint32_t msg_length:4; uint32_t pad1:2; uint32_t end_of_thread:1; } gen6_dp_sampler_const_cache; /*! Data port untyped read / write messages */ struct { uint32_t bti:8; uint32_t rgba:4; uint32_t simd_mode:2; uint32_t msg_type:4; uint32_t category:1; uint32_t header_present:1; uint32_t response_length:5; uint32_t msg_length:4; uint32_t pad2:2; uint32_t end_of_thread:1; } gen7_untyped_rw; /*! Data port byte scatter / gather */ struct { uint32_t bti:8; uint32_t simd_mode:1; uint32_t ignored0:1; uint32_t data_size:2; uint32_t ignored1:2; uint32_t msg_type:4; uint32_t category:1; uint32_t header_present:1; uint32_t response_length:5; uint32_t msg_length:4; uint32_t pad2:2; uint32_t end_of_thread:1; } gen7_byte_rw; /*! Data port Scratch Read/ write */ struct { uint32_t offset:12; uint32_t block_size:2; uint32_t ignored0:1; uint32_t invalidate_after_read:1; uint32_t channel_mode:1; uint32_t msg_type:1; uint32_t category:1; uint32_t header_present:1; uint32_t response_length:5; uint32_t msg_length:4; uint32_t pad2:2; uint32_t end_of_thread:1; } gen7_scratch_rw; /*! Data port OBlock read / write */ struct { uint32_t bti:8; uint32_t block_size:3; uint32_t ignored:2; uint32_t invalidate_after_read:1; uint32_t msg_type:4; uint32_t category:1; uint32_t header_present:1; uint32_t response_length:5; uint32_t msg_length:4; uint32_t pad2:2; uint32_t end_of_thread:1; } gen7_oblock_rw; /*! Data port dword scatter / gather */ struct { uint32_t bti:8; uint32_t block_size:2; uint32_t ignored0:3; uint32_t invalidate_after_read:1; uint32_t msg_type:4; uint32_t ignored1:1; uint32_t header_present:1; uint32_t response_length:5; uint32_t msg_length:4; uint32_t pad2:2; uint32_t end_of_thread:1; } gen7_dword_rw; /*! Data port typed read / write messages */ struct { uint32_t bti:8; uint32_t chan_mask:4; uint32_t pad:1; uint32_t slot:1; uint32_t msg_type:4; uint32_t pad2:1; uint32_t header_present:1; uint32_t response_length:5; uint32_t msg_length:4; uint32_t pad3:2; uint32_t end_of_thread:1; } gen7_typed_rw; /*! Memory fence */ struct { uint32_t bti:8; uint32_t pad:5; uint32_t commit_enable:1; uint32_t msg_type:4; uint32_t pad2:1; uint32_t header_present:1; uint32_t response_length:5; uint32_t msg_length:4; uint32_t pad3:2; uint32_t end_of_thread:1; } gen7_memory_fence; /*! atomic messages */ struct { uint32_t bti:8; uint32_t aop_type:4; uint32_t simd_mode:1; uint32_t return_data:1; uint32_t msg_type:4; uint32_t category:1; uint32_t header_present:1; uint32_t response_length:5; uint32_t msg_length:4; uint32_t pad3:2; uint32_t end_of_thread:1; } gen7_atomic_op; struct { uint32_t src1_subreg_nr_high:1; uint32_t src1_reg_nr:8; uint32_t pad0:1; uint32_t src2_rep_ctrl:1; uint32_t src2_swizzle:8; uint32_t src2_subreg_nr:3; uint32_t src2_reg_nr:8; uint32_t pad1:2; } da3src; /*! Message gateway */ struct { uint32_t subfunc:3; uint32_t pad:11; uint32_t ackreq:1; uint32_t notify:2; uint32_t pad2:2; uint32_t header_present:1; uint32_t response_length:5; uint32_t msg_length:4; uint32_t pad3:2; uint32_t end_of_thread:1; } gen7_msg_gw; int d; uint32_t ud; float f; } bits3; }; #endif /* __GEN_DEFS_HPP__ */ Release_v0.3/backend/src/backend/gen_encoder.cpp000066400000000000000000001364161223142177000217370ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /* Copyright (C) Intel Corp. 2006. All Rights Reserved. Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to develop this 3D driver. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice (including the next paragraph) shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. **********************************************************************/ /* * Authors: * Keith Whitwell */ #include "backend/gen_encoder.hpp" #include namespace gbe { ////////////////////////////////////////////////////////////////////////// // Some helper functions to encode ////////////////////////////////////////////////////////////////////////// INLINE bool isVectorOfBytes(GenRegister reg) { if (reg.hstride != GEN_HORIZONTAL_STRIDE_0 && (reg.type == GEN_TYPE_UB || reg.type == GEN_TYPE_B)) return true; else return false; } INLINE bool needToSplitAlu1(GenEncoder *p, GenRegister dst, GenRegister src) { if (p->curr.execWidth != 16) return false; if (isVectorOfBytes(dst) == true) return true; if (isVectorOfBytes(src) == true) return true; return false; } INLINE bool needToSplitAlu2(GenEncoder *p, GenRegister dst, GenRegister src0, GenRegister src1) { if (p->curr.execWidth != 16) return false; if (isVectorOfBytes(dst) == true) return true; if (isVectorOfBytes(src0) == true) return true; if (isVectorOfBytes(src1) == true) return true; return false; } INLINE bool needToSplitCmp(GenEncoder *p, GenRegister src0, GenRegister src1) { if (p->curr.execWidth != 16) return false; if (isVectorOfBytes(src0) == true) return true; if (isVectorOfBytes(src1) == true) return true; if (src0.type == GEN_TYPE_D || src0.type == GEN_TYPE_UD || src0.type == GEN_TYPE_F) return true; if (src1.type == GEN_TYPE_D || src1.type == GEN_TYPE_UD || src1.type == GEN_TYPE_F) return true; return false; } static void setMessageDescriptor(GenEncoder *p, GenInstruction *inst, enum GenMessageTarget sfid, unsigned msg_length, unsigned response_length, bool header_present = false, bool end_of_thread = false) { p->setSrc1(inst, GenRegister::immd(0)); inst->bits3.generic_gen5.header_present = header_present; inst->bits3.generic_gen5.response_length = response_length; inst->bits3.generic_gen5.msg_length = msg_length; inst->bits3.generic_gen5.end_of_thread = end_of_thread; inst->header.destreg_or_condmod = sfid; } static void setDPUntypedRW(GenEncoder *p, GenInstruction *insn, uint32_t bti, uint32_t rgba, uint32_t msg_type, uint32_t msg_length, uint32_t response_length) { const GenMessageTarget sfid = GEN_SFID_DATAPORT_DATA_CACHE; setMessageDescriptor(p, insn, sfid, msg_length, response_length); insn->bits3.gen7_untyped_rw.msg_type = msg_type; insn->bits3.gen7_untyped_rw.bti = bti; insn->bits3.gen7_untyped_rw.rgba = rgba; if (p->curr.execWidth == 8) insn->bits3.gen7_untyped_rw.simd_mode = GEN_UNTYPED_SIMD8; else if (p->curr.execWidth == 16) insn->bits3.gen7_untyped_rw.simd_mode = GEN_UNTYPED_SIMD16; else NOT_SUPPORTED; } static void setDPByteScatterGather(GenEncoder *p, GenInstruction *insn, uint32_t bti, uint32_t elem_size, uint32_t msg_type, uint32_t msg_length, uint32_t response_length) { const GenMessageTarget sfid = GEN_SFID_DATAPORT_DATA_CACHE; setMessageDescriptor(p, insn, sfid, msg_length, response_length); insn->bits3.gen7_byte_rw.msg_type = msg_type; insn->bits3.gen7_byte_rw.bti = bti; insn->bits3.gen7_byte_rw.data_size = elem_size; if (p->curr.execWidth == 8) insn->bits3.gen7_byte_rw.simd_mode = GEN_BYTE_SCATTER_SIMD8; else if (p->curr.execWidth == 16) insn->bits3.gen7_byte_rw.simd_mode = GEN_BYTE_SCATTER_SIMD16; else NOT_SUPPORTED; } #if 0 static void setOBlockRW(GenEncoder *p, GenInstruction *insn, uint32_t bti, uint32_t size, uint32_t msg_type, uint32_t msg_length, uint32_t response_length) { const GenMessageTarget sfid = GEN_SFID_DATAPORT_DATA_CACHE; setMessageDescriptor(p, insn, sfid, msg_length, response_length); assert(size == 2 || size == 4); insn->bits3.gen7_oblock_rw.msg_type = msg_type; insn->bits3.gen7_oblock_rw.bti = bti; insn->bits3.gen7_oblock_rw.block_size = size == 2 ? 2 : 3; insn->bits3.gen7_oblock_rw.header_present = 1; } #endif static void setSamplerMessage(GenEncoder *p, GenInstruction *insn, unsigned char bti, unsigned char sampler, uint32_t msg_type, uint32_t response_length, uint32_t msg_length, bool header_present, uint32_t simd_mode, uint32_t return_format) { const GenMessageTarget sfid = GEN_SFID_SAMPLER; setMessageDescriptor(p, insn, sfid, msg_length, response_length); insn->bits3.sampler_gen7.bti = bti; insn->bits3.sampler_gen7.sampler = sampler; insn->bits3.sampler_gen7.msg_type = msg_type; insn->bits3.sampler_gen7.simd_mode = simd_mode; } static void setTypedWriteMessage(GenEncoder *p, GenInstruction *insn, unsigned char bti, unsigned char msg_type, uint32_t msg_length, bool header_present) { const GenMessageTarget sfid = GEN6_SFID_DATAPORT_RENDER_CACHE; setMessageDescriptor(p, insn, sfid, msg_length, 0, header_present); insn->bits3.gen7_typed_rw.bti = bti; insn->bits3.gen7_typed_rw.msg_type = msg_type; } static void setDWordScatterMessgae(GenEncoder *p, GenInstruction *insn, uint32_t bti, uint32_t block_size, uint32_t msg_type, uint32_t msg_length, uint32_t response_length) { const GenMessageTarget sfid = GEN6_SFID_DATAPORT_CONSTANT_CACHE; setMessageDescriptor(p, insn, sfid, msg_length, response_length); insn->bits3.gen7_dword_rw.msg_type = msg_type; insn->bits3.gen7_dword_rw.bti = bti; insn->bits3.gen7_dword_rw.block_size = block_size; insn->bits3.gen7_dword_rw.invalidate_after_read = 0; } ////////////////////////////////////////////////////////////////////////// // Gen Emitter encoding class ////////////////////////////////////////////////////////////////////////// GenEncoder::GenEncoder(uint32_t simdWidth, uint32_t gen) : stateNum(0), gen(gen) { this->curr.execWidth = simdWidth; this->curr.quarterControl = GEN_COMPRESSION_Q1; this->curr.noMask = 0; this->curr.flag = 0; this->curr.subFlag = 0; this->curr.predicate = GEN_PREDICATE_NORMAL; this->curr.inversePredicate = 0; } void GenEncoder::push(void) { assert(stateNum < MAX_STATE_NUM); stack[stateNum++] = curr; } void GenEncoder::pop(void) { assert(stateNum > 0); curr = stack[--stateNum]; } void GenEncoder::setHeader(GenInstruction *insn) { if (this->curr.execWidth == 8) insn->header.execution_size = GEN_WIDTH_8; else if (this->curr.execWidth == 16) insn->header.execution_size = GEN_WIDTH_16; else if (this->curr.execWidth == 1) insn->header.execution_size = GEN_WIDTH_1; else NOT_IMPLEMENTED; insn->header.acc_wr_control = this->curr.accWrEnable; insn->header.quarter_control = this->curr.quarterControl; insn->bits1.ia1.nib_ctrl = this->curr.nibControl; insn->header.mask_control = this->curr.noMask; insn->bits2.ia1.flag_reg_nr = this->curr.flag; insn->bits2.ia1.flag_sub_reg_nr = this->curr.subFlag; if (this->curr.predicate != GEN_PREDICATE_NONE) { insn->header.predicate_control = this->curr.predicate; insn->header.predicate_inverse = this->curr.inversePredicate; } insn->header.saturate = this->curr.saturate; } void GenEncoder::setDst(GenInstruction *insn, GenRegister dest) { if (dest.file != GEN_ARCHITECTURE_REGISTER_FILE) assert(dest.nr < 128); insn->bits1.da1.dest_reg_file = dest.file; insn->bits1.da1.dest_reg_type = dest.type; insn->bits1.da1.dest_address_mode = dest.address_mode; insn->bits1.da1.dest_reg_nr = dest.nr; insn->bits1.da1.dest_subreg_nr = dest.subnr; if (dest.hstride == GEN_HORIZONTAL_STRIDE_0) dest.hstride = GEN_HORIZONTAL_STRIDE_1; insn->bits1.da1.dest_horiz_stride = dest.hstride; } void GenEncoder::setSrc0(GenInstruction *insn, GenRegister reg) { if (reg.file != GEN_ARCHITECTURE_REGISTER_FILE) assert(reg.nr < 128); if (reg.address_mode == GEN_ADDRESS_DIRECT) { insn->bits1.da1.src0_reg_file = reg.file; insn->bits1.da1.src0_reg_type = reg.type; insn->bits2.da1.src0_abs = reg.absolute; insn->bits2.da1.src0_negate = reg.negation; insn->bits2.da1.src0_address_mode = reg.address_mode; if (reg.file == GEN_IMMEDIATE_VALUE) { insn->bits3.ud = reg.value.ud; /* Required to set some fields in src1 as well: */ insn->bits1.da1.src1_reg_file = 0; /* arf */ insn->bits1.da1.src1_reg_type = reg.type; } else { if (insn->header.access_mode == GEN_ALIGN_1) { insn->bits2.da1.src0_subreg_nr = reg.subnr; insn->bits2.da1.src0_reg_nr = reg.nr; } else { insn->bits2.da16.src0_subreg_nr = reg.subnr / 16; insn->bits2.da16.src0_reg_nr = reg.nr; } if (reg.width == GEN_WIDTH_1 && insn->header.execution_size == GEN_WIDTH_1) { insn->bits2.da1.src0_horiz_stride = GEN_HORIZONTAL_STRIDE_0; insn->bits2.da1.src0_width = GEN_WIDTH_1; insn->bits2.da1.src0_vert_stride = GEN_VERTICAL_STRIDE_0; } else { insn->bits2.da1.src0_horiz_stride = reg.hstride; insn->bits2.da1.src0_width = reg.width; insn->bits2.da1.src0_vert_stride = reg.vstride; } } } else { insn->bits1.ia1.src0_reg_file = GEN_GENERAL_REGISTER_FILE; insn->bits1.ia1.src0_reg_type = reg.type; insn->bits2.ia1.src0_subreg_nr = 0; insn->bits2.ia1.src0_indirect_offset = 0; insn->bits2.ia1.src0_abs = 0; insn->bits2.ia1.src0_negate = 0; insn->bits2.ia1.src0_address_mode = reg.address_mode; insn->bits2.ia1.src0_horiz_stride = GEN_HORIZONTAL_STRIDE_0; insn->bits2.ia1.src0_width = GEN_WIDTH_1; insn->bits2.ia1.src0_vert_stride = GEN_VERTICAL_STRIDE_ONE_DIMENSIONAL; } } void GenEncoder::setSrc1(GenInstruction *insn, GenRegister reg) { assert(reg.nr < 128); assert(reg.file != GEN_ARCHITECTURE_REGISTER_FILE || reg.nr == 0); insn->bits1.da1.src1_reg_file = reg.file; insn->bits1.da1.src1_reg_type = reg.type; insn->bits3.da1.src1_abs = reg.absolute; insn->bits3.da1.src1_negate = reg.negation; assert(insn->bits1.da1.src0_reg_file != GEN_IMMEDIATE_VALUE); if (reg.file == GEN_IMMEDIATE_VALUE) insn->bits3.ud = reg.value.ud; else { assert (reg.address_mode == GEN_ADDRESS_DIRECT); if (insn->header.access_mode == GEN_ALIGN_1) { insn->bits3.da1.src1_subreg_nr = reg.subnr; insn->bits3.da1.src1_reg_nr = reg.nr; } else { insn->bits3.da16.src1_subreg_nr = reg.subnr / 16; insn->bits3.da16.src1_reg_nr = reg.nr; } if (reg.width == GEN_WIDTH_1 && insn->header.execution_size == GEN_WIDTH_1) { insn->bits3.da1.src1_horiz_stride = GEN_HORIZONTAL_STRIDE_0; insn->bits3.da1.src1_width = GEN_WIDTH_1; insn->bits3.da1.src1_vert_stride = GEN_VERTICAL_STRIDE_0; } else { insn->bits3.da1.src1_horiz_stride = reg.hstride; insn->bits3.da1.src1_width = reg.width; insn->bits3.da1.src1_vert_stride = reg.vstride; } } } static const uint32_t untypedRWMask[] = { GEN_UNTYPED_ALPHA|GEN_UNTYPED_BLUE|GEN_UNTYPED_GREEN|GEN_UNTYPED_RED, GEN_UNTYPED_ALPHA|GEN_UNTYPED_BLUE|GEN_UNTYPED_GREEN, GEN_UNTYPED_ALPHA|GEN_UNTYPED_BLUE, GEN_UNTYPED_ALPHA, 0 }; void GenEncoder::READ64(GenRegister dst, GenRegister tmp, GenRegister addr, GenRegister src, uint32_t bti, uint32_t elemNum) { GenRegister dst32 = GenRegister::retype(dst, GEN_TYPE_UD); src = GenRegister::retype(src, GEN_TYPE_UD); addr = GenRegister::retype(addr, GEN_TYPE_UD); tmp = GenRegister::retype(tmp, GEN_TYPE_UD); uint32_t originSimdWidth = curr.execWidth; uint32_t originPredicate = curr.predicate; uint32_t originMask = curr.noMask; push(); for ( uint32_t channels = 0, currQuarter = GEN_COMPRESSION_Q1; channels < originSimdWidth; channels += 8, currQuarter++) { curr.predicate = GEN_PREDICATE_NONE; curr.noMask = GEN_MASK_DISABLE; curr.execWidth = 8; /* XXX The following instruction is illegal, but it works as SIMD 1*4 mode which is what we want here. */ MOV(GenRegister::h2(addr), GenRegister::suboffset(src, channels)); ADD(GenRegister::h2(GenRegister::suboffset(addr, 1)), GenRegister::suboffset(src, channels), GenRegister::immd(4)); MOV(GenRegister::h2(GenRegister::suboffset(addr, 8)), GenRegister::suboffset(src, channels + 4)); ADD(GenRegister::h2(GenRegister::suboffset(addr, 9)), GenRegister::suboffset(src, channels + 4), GenRegister::immd(4)); // Let's use SIMD16 to read all bytes for 8 doubles data at one time. curr.execWidth = 16; this->UNTYPED_READ(tmp, addr, bti, elemNum); if (originSimdWidth == 16) curr.quarterControl = currQuarter; curr.predicate = originPredicate; curr.noMask = originMask; // Back to simd8 for correct predication flag. curr.execWidth = 8; MOV(GenRegister::retype(GenRegister::suboffset(dst32, channels * 2), GEN_TYPE_DF), GenRegister::retype(tmp, GEN_TYPE_DF)); } pop(); } void GenEncoder::WRITE64(GenRegister msg, GenRegister data, uint32_t bti, uint32_t elemNum, bool is_scalar) { GenRegister data32 = GenRegister::retype(data, GEN_TYPE_UD); GenRegister unpacked; msg = GenRegister::retype(msg, GEN_TYPE_UD); int originSimdWidth = curr.execWidth; int originPredicate = curr.predicate; int originMask = curr.noMask; push(); for (uint32_t half = 0; half < 2; half++) { curr.predicate = GEN_PREDICATE_NONE; curr.noMask = GEN_MASK_DISABLE; curr.execWidth = 8; if (is_scalar) { unpacked = data32; unpacked.subnr += half * 4; } else unpacked = GenRegister::unpacked_ud(data32.nr, data32.subnr + half); MOV(GenRegister::suboffset(msg, originSimdWidth), unpacked); if (originSimdWidth == 16) { if (is_scalar) { unpacked = data32; unpacked.subnr += half * 4; } else unpacked = GenRegister::unpacked_ud(data32.nr + 2, data32.subnr + half); MOV(GenRegister::suboffset(msg, originSimdWidth + 8), unpacked); curr.execWidth = 16; } if (half == 1) ADD(GenRegister::retype(msg, GEN_TYPE_UD), GenRegister::retype(msg, GEN_TYPE_UD), GenRegister::immd(4)); curr.predicate = originPredicate; curr.noMask = originMask; this->UNTYPED_WRITE(msg, bti, elemNum); } pop(); } void GenEncoder::UNTYPED_READ(GenRegister dst, GenRegister src, uint32_t bti, uint32_t elemNum) { GenInstruction *insn = this->next(GEN_OPCODE_SEND); assert(elemNum >= 1 || elemNum <= 4); uint32_t msg_length = 0; uint32_t response_length = 0; if (this->curr.execWidth == 8) { msg_length = 1; response_length = elemNum; } else if (this->curr.execWidth == 16) { msg_length = 2; response_length = 2*elemNum; } else NOT_IMPLEMENTED; this->setHeader(insn); this->setDst(insn, GenRegister::uw16grf(dst.nr, 0)); this->setSrc0(insn, GenRegister::ud8grf(src.nr, 0)); this->setSrc1(insn, GenRegister::immud(0)); setDPUntypedRW(this, insn, bti, untypedRWMask[elemNum], GEN_UNTYPED_READ, msg_length, response_length); } void GenEncoder::UNTYPED_WRITE(GenRegister msg, uint32_t bti, uint32_t elemNum) { GenInstruction *insn = this->next(GEN_OPCODE_SEND); assert(elemNum >= 1 || elemNum <= 4); uint32_t msg_length = 0; uint32_t response_length = 0; this->setHeader(insn); if (this->curr.execWidth == 8) { this->setDst(insn, GenRegister::retype(GenRegister::null(), GEN_TYPE_UD)); msg_length = 1+elemNum; } else if (this->curr.execWidth == 16) { this->setDst(insn, GenRegister::retype(GenRegister::null(), GEN_TYPE_UW)); msg_length = 2*(1+elemNum); } else NOT_IMPLEMENTED; this->setSrc0(insn, GenRegister::ud8grf(msg.nr, 0)); this->setSrc1(insn, GenRegister::immud(0)); setDPUntypedRW(this, insn, bti, untypedRWMask[elemNum], GEN_UNTYPED_WRITE, msg_length, response_length); } void GenEncoder::BYTE_GATHER(GenRegister dst, GenRegister src, uint32_t bti, uint32_t elemSize) { GenInstruction *insn = this->next(GEN_OPCODE_SEND); uint32_t msg_length = 0; uint32_t response_length = 0; if (this->curr.execWidth == 8) { msg_length = 1; response_length = 1; } else if (this->curr.execWidth == 16) { msg_length = 2; response_length = 2; } else NOT_IMPLEMENTED; this->setHeader(insn); this->setDst(insn, GenRegister::uw16grf(dst.nr, 0)); this->setSrc0(insn, GenRegister::ud8grf(src.nr, 0)); this->setSrc1(insn, GenRegister::immud(0)); setDPByteScatterGather(this, insn, bti, elemSize, GEN_BYTE_GATHER, msg_length, response_length); } void GenEncoder::BYTE_SCATTER(GenRegister msg, uint32_t bti, uint32_t elemSize) { GenInstruction *insn = this->next(GEN_OPCODE_SEND); uint32_t msg_length = 0; uint32_t response_length = 0; this->setHeader(insn); if (this->curr.execWidth == 8) { this->setDst(insn, GenRegister::retype(GenRegister::null(), GEN_TYPE_UD)); msg_length = 2; } else if (this->curr.execWidth == 16) { this->setDst(insn, GenRegister::retype(GenRegister::null(), GEN_TYPE_UW)); msg_length = 4; } else NOT_IMPLEMENTED; this->setSrc0(insn, GenRegister::ud8grf(msg.nr, 0)); this->setSrc1(insn, GenRegister::immud(0)); setDPByteScatterGather(this, insn, bti, elemSize, GEN_BYTE_SCATTER, msg_length, response_length); } void GenEncoder::DWORD_GATHER(GenRegister dst, GenRegister src, uint32_t bti) { GenInstruction *insn = this->next(GEN_OPCODE_SEND); uint32_t msg_length = 0; uint32_t response_length = 0; uint32_t block_size = 0; if (this->curr.execWidth == 8) { msg_length = 1; response_length = 1; block_size = GEN_DWORD_SCATTER_8_DWORDS; } else if (this->curr.execWidth == 16) { msg_length = 2; response_length = 2; block_size = GEN_DWORD_SCATTER_16_DWORDS; } else NOT_IMPLEMENTED; this->setHeader(insn); this->setDst(insn, dst); this->setSrc0(insn, src); this->setSrc1(insn, GenRegister::immud(0)); setDWordScatterMessgae(this, insn, bti, block_size, GEN_DWORD_GATHER, msg_length, response_length); } void GenEncoder::ATOMIC(GenRegister dst, uint32_t function, GenRegister src, uint32_t bti, uint32_t srcNum) { GenInstruction *insn = this->next(GEN_OPCODE_SEND); uint32_t msg_length = 0; uint32_t response_length = 0; if (this->curr.execWidth == 8) { msg_length = srcNum; response_length = 1; } else if (this->curr.execWidth == 16) { msg_length = 2*srcNum; response_length = 2; } else NOT_IMPLEMENTED; this->setHeader(insn); this->setDst(insn, GenRegister::uw16grf(dst.nr, 0)); this->setSrc0(insn, GenRegister::ud8grf(src.nr, 0)); this->setSrc1(insn, GenRegister::immud(0)); const GenMessageTarget sfid = GEN_SFID_DATAPORT_DATA_CACHE; setMessageDescriptor(this, insn, sfid, msg_length, response_length); insn->bits3.gen7_atomic_op.msg_type = GEN_UNTYPED_ATOMIC_READ; insn->bits3.gen7_atomic_op.bti = bti; insn->bits3.gen7_atomic_op.return_data = 1; insn->bits3.gen7_atomic_op.aop_type = function; if (this->curr.execWidth == 8) insn->bits3.gen7_atomic_op.simd_mode = GEN_ATOMIC_SIMD8; else if (this->curr.execWidth == 16) insn->bits3.gen7_atomic_op.simd_mode = GEN_ATOMIC_SIMD16; else NOT_SUPPORTED; } GenInstruction *GenEncoder::next(uint32_t opcode) { GenInstruction insn; std::memset(&insn, 0, sizeof(GenInstruction)); insn.header.opcode = opcode; this->store.push_back(insn); return &this->store.back(); } INLINE void _handleDouble(GenEncoder *p, uint32_t opcode, GenRegister dst, GenRegister src0, GenRegister src1 = GenRegister::null()) { int w = p->curr.execWidth; p->push(); p->curr.nibControl = 0; GenInstruction *insn = p->next(opcode); p->setHeader(insn); p->setDst(insn, dst); p->setSrc0(insn, src0); if (!GenRegister::isNull(src1)) p->setSrc1(insn, src1); if (w == 8) p->curr.nibControl = 1; // second 1/8 mask insn = p->next(opcode); p->setHeader(insn); p->setDst(insn, GenRegister::suboffset(dst, w / 2)); p->setSrc0(insn, GenRegister::suboffset(src0, w / 2)); if (!GenRegister::isNull(src1)) p->setSrc1(insn, GenRegister::suboffset(src1, w / 2)); p->pop(); } // Double register accessing is a little special, // Per Gen spec, then only supported mode is SIMD8 and, it only // handles four doubles each time. // We need to lower down SIMD16 to two SIMD8 and lower down SIMD8 // to two SIMD1x4. INLINE void handleDouble(GenEncoder *p, uint32_t opcode, GenRegister dst, GenRegister src0, GenRegister src1 = GenRegister::null()) { if (p->curr.execWidth == 8) _handleDouble(p, opcode, dst, src0, src1); else if (p->curr.execWidth == 16) { p->push(); p->curr.execWidth = 8; p->curr.quarterControl = GEN_COMPRESSION_Q1; _handleDouble(p, opcode, dst, src0, src1); p->curr.quarterControl = GEN_COMPRESSION_Q2; if (!GenRegister::isNull(src1)) src1 = GenRegister::offset(src1, 2); _handleDouble(p, opcode, GenRegister::offset(dst, 2), GenRegister::offset(src0, 2), src1); p->pop(); } } INLINE void alu1(GenEncoder *p, uint32_t opcode, GenRegister dst, GenRegister src) { if (dst.isdf() && src.isdf()) { handleDouble(p, opcode, dst, src); } else if (dst.isint64() && src.isint64()) { // handle int64 int execWidth = p->curr.execWidth; p->push(); p->curr.execWidth = 8; for (int nib = 0; nib < execWidth / 4; nib ++) { p->curr.chooseNib(nib); p->MOV(dst.bottom_half(), src.bottom_half()); p->MOV(dst.top_half(), src.top_half()); dst = GenRegister::suboffset(dst, 4); src = GenRegister::suboffset(src, 4); } p->pop(); } else if (needToSplitAlu1(p, dst, src) == false) { GenInstruction *insn = p->next(opcode); p->setHeader(insn); p->setDst(insn, dst); p->setSrc0(insn, src); } else { GenInstruction *insnQ1, *insnQ2; // Instruction for the first quarter insnQ1 = p->next(opcode); p->setHeader(insnQ1); insnQ1->header.quarter_control = GEN_COMPRESSION_Q1; insnQ1->header.execution_size = GEN_WIDTH_8; p->setDst(insnQ1, dst); p->setSrc0(insnQ1, src); // Instruction for the second quarter insnQ2 = p->next(opcode); p->setHeader(insnQ2); insnQ2->header.quarter_control = GEN_COMPRESSION_Q2; insnQ2->header.execution_size = GEN_WIDTH_8; p->setDst(insnQ2, GenRegister::Qn(dst, 1)); p->setSrc0(insnQ2, GenRegister::Qn(src, 1)); } } INLINE void alu2(GenEncoder *p, uint32_t opcode, GenRegister dst, GenRegister src0, GenRegister src1) { if (dst.isdf() && src0.isdf() && src1.isdf()) { handleDouble(p, opcode, dst, src0, src1); } else if (needToSplitAlu2(p, dst, src0, src1) == false) { GenInstruction *insn = p->next(opcode); p->setHeader(insn); p->setDst(insn, dst); p->setSrc0(insn, src0); p->setSrc1(insn, src1); } else { GenInstruction *insnQ1, *insnQ2; // Instruction for the first quarter insnQ1 = p->next(opcode); p->setHeader(insnQ1); insnQ1->header.quarter_control = GEN_COMPRESSION_Q1; insnQ1->header.execution_size = GEN_WIDTH_8; p->setDst(insnQ1, dst); p->setSrc0(insnQ1, src0); p->setSrc1(insnQ1, src1); // Instruction for the second quarter insnQ2 = p->next(opcode); p->setHeader(insnQ2); insnQ2->header.quarter_control = GEN_COMPRESSION_Q2; insnQ2->header.execution_size = GEN_WIDTH_8; p->setDst(insnQ2, GenRegister::Qn(dst, 1)); p->setSrc0(insnQ2, GenRegister::Qn(src0, 1)); p->setSrc1(insnQ2, GenRegister::Qn(src1, 1)); } } #define NO_SWIZZLE ((0<<0) | (1<<2) | (2<<4) | (3<<6)) static GenInstruction *alu3(GenEncoder *p, uint32_t opcode, GenRegister dest, GenRegister src0, GenRegister src1, GenRegister src2) { GenInstruction *insn = p->next(opcode); assert(dest.file == GEN_GENERAL_REGISTER_FILE); assert(dest.nr < 128); assert(dest.address_mode == GEN_ADDRESS_DIRECT); assert(dest.type = GEN_TYPE_F); insn->bits1.da3src.dest_reg_file = 0; insn->bits1.da3src.dest_reg_nr = dest.nr; insn->bits1.da3src.dest_subreg_nr = dest.subnr / 16; insn->bits1.da3src.dest_writemask = 0xf; p->setHeader(insn); insn->header.access_mode = GEN_ALIGN_16; insn->header.execution_size = GEN_WIDTH_8; assert(src0.file == GEN_GENERAL_REGISTER_FILE); assert(src0.address_mode == GEN_ADDRESS_DIRECT); assert(src0.nr < 128); assert(src0.type == GEN_TYPE_F); insn->bits2.da3src.src0_swizzle = NO_SWIZZLE; insn->bits2.da3src.src0_subreg_nr = src0.subnr / 4 ; insn->bits2.da3src.src0_reg_nr = src0.nr; insn->bits1.da3src.src0_abs = src0.absolute; insn->bits1.da3src.src0_negate = src0.negation; insn->bits2.da3src.src0_rep_ctrl = src0.vstride == GEN_VERTICAL_STRIDE_0; assert(src1.file == GEN_GENERAL_REGISTER_FILE); assert(src1.address_mode == GEN_ADDRESS_DIRECT); assert(src1.nr < 128); assert(src1.type == GEN_TYPE_F); insn->bits2.da3src.src1_swizzle = NO_SWIZZLE; insn->bits2.da3src.src1_subreg_nr_low = (src1.subnr / 4) & 0x3; insn->bits3.da3src.src1_subreg_nr_high = (src1.subnr / 4) >> 2; insn->bits2.da3src.src1_rep_ctrl = src1.vstride == GEN_VERTICAL_STRIDE_0; insn->bits3.da3src.src1_reg_nr = src1.nr; insn->bits1.da3src.src1_abs = src1.absolute; insn->bits1.da3src.src1_negate = src1.negation; assert(src2.file == GEN_GENERAL_REGISTER_FILE); assert(src2.address_mode == GEN_ADDRESS_DIRECT); assert(src2.nr < 128); assert(src2.type == GEN_TYPE_F); insn->bits3.da3src.src2_swizzle = NO_SWIZZLE; insn->bits3.da3src.src2_subreg_nr = src2.subnr / 4; insn->bits3.da3src.src2_rep_ctrl = src2.vstride == GEN_VERTICAL_STRIDE_0; insn->bits3.da3src.src2_reg_nr = src2.nr; insn->bits1.da3src.src2_abs = src2.absolute; insn->bits1.da3src.src2_negate = src2.negation; // Emit second half of the instruction if (p->curr.execWidth == 16) { GenInstruction q1Insn = *insn; insn = p->next(opcode); *insn = q1Insn; insn->header.quarter_control = GEN_COMPRESSION_Q2; insn->bits1.da3src.dest_reg_nr++; if (insn->bits2.da3src.src0_rep_ctrl == 0) insn->bits2.da3src.src0_reg_nr++; if (insn->bits2.da3src.src1_rep_ctrl == 0) insn->bits3.da3src.src1_reg_nr++; if (insn->bits3.da3src.src2_rep_ctrl == 0) insn->bits3.da3src.src2_reg_nr++; } return insn; } #undef NO_SWIZZLE #define ALU1(OP) \ void GenEncoder::OP(GenRegister dest, GenRegister src0) { \ alu1(this, GEN_OPCODE_##OP, dest, src0); \ } #define ALU2(OP) \ void GenEncoder::OP(GenRegister dest, GenRegister src0, GenRegister src1) { \ alu2(this, GEN_OPCODE_##OP, dest, src0, src1); \ } #define ALU3(OP) \ void GenEncoder::OP(GenRegister dest, GenRegister src0, GenRegister src1, GenRegister src2) { \ alu3(this, GEN_OPCODE_##OP, dest, src0, src1, src2); \ } void GenEncoder::LOAD_DF_IMM(GenRegister dest, GenRegister tmp, double value) { union { double d; unsigned u[2]; } u; u.d = value; GenRegister r = GenRegister::retype(tmp, GEN_TYPE_UD); push(); curr.predicate = GEN_PREDICATE_NONE; curr.execWidth = 1; MOV(r, GenRegister::immud(u.u[1])); MOV(GenRegister::suboffset(r, 1), GenRegister::immud(u.u[0])); pop(); r.type = GEN_TYPE_DF; r.vstride = GEN_VERTICAL_STRIDE_0; r.width = GEN_WIDTH_1; r.hstride = GEN_HORIZONTAL_STRIDE_0; push(); uint32_t width = curr.execWidth; curr.execWidth = 8; curr.predicate = GEN_PREDICATE_NONE; curr.noMask = 1; curr.quarterControl = GEN_COMPRESSION_Q1; MOV(dest, r); if (width == 16) { curr.quarterControl = GEN_COMPRESSION_Q2; MOV(GenRegister::offset(dest, 2), r); } pop(); } void GenEncoder::UPSAMPLE_SHORT(GenRegister dest, GenRegister src0, GenRegister src1) { dest.type = GEN_TYPE_B; dest.hstride = GEN_HORIZONTAL_STRIDE_2; src0.type = GEN_TYPE_B; src0.hstride = GEN_HORIZONTAL_STRIDE_2; src1.type = GEN_TYPE_B; src1.hstride = GEN_HORIZONTAL_STRIDE_2; MOV(dest, src1); dest.subnr ++; MOV(dest, src0); } void GenEncoder::UPSAMPLE_INT(GenRegister dest, GenRegister src0, GenRegister src1) { dest.type = GEN_TYPE_W; dest.hstride = GEN_HORIZONTAL_STRIDE_2; src0.type = GEN_TYPE_W; src0.hstride = GEN_HORIZONTAL_STRIDE_2; src1.type = GEN_TYPE_W; src1.hstride = GEN_HORIZONTAL_STRIDE_2; MOV(dest, src1); dest.subnr += 2; MOV(dest, src0); } void GenEncoder::LOAD_INT64_IMM(GenRegister dest, int64_t value) { GenRegister u0 = GenRegister::immd((int)value), u1 = GenRegister::immd(value >> 32); int execWidth = curr.execWidth; push(); curr.execWidth = 8; for(int nib = 0; nib < execWidth/4; nib ++) { curr.chooseNib(nib); MOV(dest.top_half(), u1); MOV(dest.bottom_half(), u0); dest = GenRegister::suboffset(dest, 4); } pop(); } void GenEncoder::MOV_DF(GenRegister dest, GenRegister src0, GenRegister r) { int w = curr.execWidth; if (src0.isdf()) { GBE_ASSERT(0); // MOV DF is called from convert instruction, // We should never convert a df to a df. } else { GenRegister r0 = GenRegister::h2(r); push(); curr.execWidth = 8; curr.predicate = GEN_PREDICATE_NONE; MOV(r0, src0); MOV(GenRegister::suboffset(r0, 8), GenRegister::suboffset(src0, 4)); curr.predicate = GEN_PREDICATE_NORMAL; curr.quarterControl = 0; curr.nibControl = 0; MOV(dest, r); curr.nibControl = 1; MOV(GenRegister::suboffset(dest, 4), GenRegister::suboffset(r, 8)); pop(); if (w == 16) { push(); curr.execWidth = 8; curr.predicate = GEN_PREDICATE_NONE; MOV(r0, GenRegister::suboffset(src0, 8)); MOV(GenRegister::suboffset(r0, 8), GenRegister::suboffset(src0, 12)); curr.predicate = GEN_PREDICATE_NORMAL; curr.quarterControl = 1; curr.nibControl = 0; MOV(GenRegister::suboffset(dest, 8), r); curr.nibControl = 1; MOV(GenRegister::suboffset(dest, 12), GenRegister::suboffset(r, 8)); pop(); } } } ALU1(MOV) ALU1(RNDZ) ALU1(RNDE) ALU1(RNDD) ALU1(RNDU) ALU1(FBH) ALU1(FBL) ALU2(SEL) ALU1(NOT) ALU2(AND) ALU2(OR) ALU2(XOR) ALU2(SHR) ALU2(SHL) ALU2(RSR) ALU2(RSL) ALU2(ASR) ALU1(FRC) ALU2(MAC) ALU1(LZD) ALU2(LINE) ALU2(PLN) ALU2(MACH) ALU3(MAD) void GenEncoder::SUBB(GenRegister dest, GenRegister src0, GenRegister src1) { push(); curr.accWrEnable = 1; alu2(this, GEN_OPCODE_SUBB, dest, src0, src1); pop(); } void GenEncoder::ADDC(GenRegister dest, GenRegister src0, GenRegister src1) { push(); curr.accWrEnable = 1; alu2(this, GEN_OPCODE_ADDC, dest, src0, src1); pop(); } void GenEncoder::ADD(GenRegister dest, GenRegister src0, GenRegister src1) { if (src0.type == GEN_TYPE_F || (src0.file == GEN_IMMEDIATE_VALUE && src0.type == GEN_TYPE_VF)) { assert(src1.type != GEN_TYPE_UD); assert(src1.type != GEN_TYPE_D); } if (src1.type == GEN_TYPE_F || (src1.file == GEN_IMMEDIATE_VALUE && src1.type == GEN_TYPE_VF)) { assert(src0.type != GEN_TYPE_UD); assert(src0.type != GEN_TYPE_D); } alu2(this, GEN_OPCODE_ADD, dest, src0, src1); } void GenEncoder::MUL(GenRegister dest, GenRegister src0, GenRegister src1) { if (src0.type == GEN_TYPE_D || src0.type == GEN_TYPE_UD || src1.type == GEN_TYPE_D || src1.type == GEN_TYPE_UD) assert(dest.type != GEN_TYPE_F); if (src0.type == GEN_TYPE_F || (src0.file == GEN_IMMEDIATE_VALUE && src0.type == GEN_TYPE_VF)) { assert(src1.type != GEN_TYPE_UD); assert(src1.type != GEN_TYPE_D); } if (src1.type == GEN_TYPE_F || (src1.file == GEN_IMMEDIATE_VALUE && src1.type == GEN_TYPE_VF)) { assert(src0.type != GEN_TYPE_UD); assert(src0.type != GEN_TYPE_D); } assert(src0.file != GEN_ARCHITECTURE_REGISTER_FILE || src0.nr != GEN_ARF_ACCUMULATOR); assert(src1.file != GEN_ARCHITECTURE_REGISTER_FILE || src1.nr != GEN_ARF_ACCUMULATOR); alu2(this, GEN_OPCODE_MUL, dest, src0, src1); } void GenEncoder::NOP(void) { GenInstruction *insn = this->next(GEN_OPCODE_NOP); this->setDst(insn, GenRegister::retype(GenRegister::f4grf(0,0), GEN_TYPE_UD)); this->setSrc0(insn, GenRegister::retype(GenRegister::f4grf(0,0), GEN_TYPE_UD)); this->setSrc1(insn, GenRegister::immud(0x0)); } void GenEncoder::BARRIER(GenRegister src) { GenInstruction *insn = this->next(GEN_OPCODE_SEND); this->setHeader(insn); this->setDst(insn, GenRegister::null()); this->setSrc0(insn, src); setMessageDescriptor(this, insn, GEN_SFID_MESSAGE_GATEWAY, 1, 0); insn->bits3.msg_gateway.sub_function_id = GEN_BARRIER_MSG; insn->bits3.msg_gateway.notify = 0x1; } void GenEncoder::FENCE(GenRegister dst) { GenInstruction *insn = this->next(GEN_OPCODE_SEND); this->setHeader(insn); this->setDst(insn, dst); this->setSrc0(insn, dst); setMessageDescriptor(this, insn, GEN_SFID_DATAPORT_DATA_CACHE, 1, 1, 1); insn->bits3.gen7_memory_fence.msg_type = GEN_MEM_FENCE; insn->bits3.gen7_memory_fence.commit_enable = 0x1; } void GenEncoder::JMPI(GenRegister src) { alu2(this, GEN_OPCODE_JMPI, GenRegister::ip(), GenRegister::ip(), src); } void GenEncoder::patchJMPI(uint32_t insnID, int32_t jumpDistance) { GenInstruction &insn = this->store[insnID]; assert(insnID < this->store.size()); assert(insn.header.opcode == GEN_OPCODE_JMPI); this->setSrc1(&insn, GenRegister::immd(jumpDistance)); } void GenEncoder::CMP(uint32_t conditional, GenRegister src0, GenRegister src1) { if (needToSplitCmp(this, src0, src1) == false) { GenInstruction *insn = this->next(GEN_OPCODE_CMP); this->setHeader(insn); insn->header.destreg_or_condmod = conditional; this->setDst(insn, GenRegister::null()); this->setSrc0(insn, src0); this->setSrc1(insn, src1); } else { GenInstruction *insnQ1, *insnQ2; // Instruction for the first quarter insnQ1 = this->next(GEN_OPCODE_CMP); this->setHeader(insnQ1); insnQ1->header.quarter_control = GEN_COMPRESSION_Q1; insnQ1->header.execution_size = GEN_WIDTH_8; insnQ1->header.destreg_or_condmod = conditional; this->setDst(insnQ1, GenRegister::null()); this->setSrc0(insnQ1, src0); this->setSrc1(insnQ1, src1); // Instruction for the second quarter insnQ2 = this->next(GEN_OPCODE_CMP); this->setHeader(insnQ2); insnQ2->header.quarter_control = GEN_COMPRESSION_Q2; insnQ2->header.execution_size = GEN_WIDTH_8; insnQ2->header.destreg_or_condmod = conditional; this->setDst(insnQ2, GenRegister::null()); this->setSrc0(insnQ2, GenRegister::Qn(src0, 1)); this->setSrc1(insnQ2, GenRegister::Qn(src1, 1)); } } void GenEncoder::SEL_CMP(uint32_t conditional, GenRegister dst, GenRegister src0, GenRegister src1) { GenInstruction *insn = this->next(GEN_OPCODE_SEL); GBE_ASSERT(curr.predicate == GEN_PREDICATE_NONE); this->setHeader(insn); insn->header.destreg_or_condmod = conditional; this->setDst(insn, dst); this->setSrc0(insn, src0); this->setSrc1(insn, src1); } void GenEncoder::WAIT(void) { GenInstruction *insn = this->next(GEN_OPCODE_WAIT); GenRegister src = GenRegister::notification1(); this->setDst(insn, GenRegister::null()); this->setSrc0(insn, src); this->setSrc1(insn, GenRegister::null()); insn->header.execution_size = 0; /* must */ insn->header.predicate_control = 0; insn->header.quarter_control = 0; } void GenEncoder::MATH(GenRegister dst, uint32_t function, GenRegister src0, GenRegister src1) { GenInstruction *insn = this->next(GEN_OPCODE_MATH); assert(dst.file == GEN_GENERAL_REGISTER_FILE); assert(src0.file == GEN_GENERAL_REGISTER_FILE); assert(src1.file == GEN_GENERAL_REGISTER_FILE); assert(dst.hstride == GEN_HORIZONTAL_STRIDE_1); if (function == GEN_MATH_FUNCTION_INT_DIV_QUOTIENT || function == GEN_MATH_FUNCTION_INT_DIV_REMAINDER || function == GEN_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) { assert(src0.type != GEN_TYPE_F); assert(src1.type != GEN_TYPE_F); } else { assert(src0.type == GEN_TYPE_F); assert(src1.type == GEN_TYPE_F); } insn->header.destreg_or_condmod = function; this->setHeader(insn); this->setDst(insn, dst); this->setSrc0(insn, src0); this->setSrc1(insn, src1); if (function == GEN_MATH_FUNCTION_INT_DIV_QUOTIENT || function == GEN_MATH_FUNCTION_INT_DIV_REMAINDER) { insn->header.execution_size = GEN_WIDTH_8; insn->header.quarter_control = GEN_COMPRESSION_Q1; if(this->curr.execWidth == 16) { GenInstruction *insn2 = this->next(GEN_OPCODE_MATH); GenRegister new_dest, new_src0, new_src1; new_dest = GenRegister::QnPhysical(dst, 1); new_src0 = GenRegister::QnPhysical(src0, 1); new_src1 = GenRegister::QnPhysical(src1, 1); insn2->header.destreg_or_condmod = function; this->setHeader(insn2); insn2->header.execution_size = GEN_WIDTH_8; insn2->header.quarter_control = GEN_COMPRESSION_Q2; this->setDst(insn2, new_dest); this->setSrc0(insn2, new_src0); this->setSrc1(insn2, new_src1); } } } void GenEncoder::MATH(GenRegister dst, uint32_t function, GenRegister src) { GenInstruction *insn = this->next(GEN_OPCODE_MATH); assert(dst.file == GEN_GENERAL_REGISTER_FILE); assert(src.file == GEN_GENERAL_REGISTER_FILE); assert(dst.hstride == GEN_HORIZONTAL_STRIDE_1); assert(src.type == GEN_TYPE_F); insn->header.destreg_or_condmod = function; this->setHeader(insn); this->setDst(insn, dst); this->setSrc0(insn, src); } void GenEncoder::SAMPLE(GenRegister dest, GenRegister msg, bool header_present, unsigned char bti, unsigned char sampler, unsigned int coord_cnt, uint32_t simdWidth, uint32_t writemask, uint32_t return_format) { if (writemask == 0) return; uint32_t msg_type = (simdWidth == 16) ? GEN_SAMPLER_MESSAGE_SIMD16_SAMPLE : GEN_SAMPLER_MESSAGE_SIMD8_SAMPLE; uint32_t response_length = (4 * (simdWidth / 8)); uint32_t msg_length = (coord_cnt * (simdWidth / 8)); if (header_present) msg_length++; uint32_t simd_mode = (simdWidth == 16) ? GEN_SAMPLER_SIMD_MODE_SIMD16 : GEN_SAMPLER_SIMD_MODE_SIMD8; GenInstruction *insn = this->next(GEN_OPCODE_SEND); this->setHeader(insn); this->setDst(insn, dest); this->setSrc0(insn, msg); setSamplerMessage(this, insn, bti, sampler, msg_type, response_length, msg_length, header_present, simd_mode, return_format); } void GenEncoder::TYPED_WRITE(GenRegister msg, bool header_present, unsigned char bti) { GenInstruction *insn = this->next(GEN_OPCODE_SEND); uint32_t msg_type = GEN_TYPED_WRITE; uint32_t msg_length = header_present ? 9 : 8; this->setHeader(insn); this->setDst(insn, GenRegister::retype(GenRegister::null(), GEN_TYPE_UD)); this->setSrc0(insn, msg); setTypedWriteMessage(this, insn, bti, msg_type, msg_length, header_present); } static void setScratchMessage(GenEncoder *p, GenInstruction *insn, uint32_t offset, uint32_t block_size, uint32_t channel_mode, uint32_t msg_type, uint32_t msg_length, uint32_t response_length) { const GenMessageTarget sfid = GEN_SFID_DATAPORT_DATA_CACHE; setMessageDescriptor(p, insn, sfid, msg_length, response_length, true); insn->bits3.gen7_scratch_rw.block_size = block_size; insn->bits3.gen7_scratch_rw.msg_type = msg_type; insn->bits3.gen7_scratch_rw.channel_mode = channel_mode; insn->bits3.gen7_scratch_rw.offset = offset; insn->bits3.gen7_scratch_rw.category = 1; } void GenEncoder::SCRATCH_WRITE(GenRegister msg, uint32_t offset, uint32_t size, uint32_t src_num, uint32_t channel_mode) { assert(src_num == 1 || src_num ==2); uint32_t block_size = src_num == 1 ? GEN_SCRATCH_BLOCK_SIZE_1 : GEN_SCRATCH_BLOCK_SIZE_2; GenInstruction *insn = this->next(GEN_OPCODE_SEND); this->setHeader(insn); this->setDst(insn, GenRegister::retype(GenRegister::null(), GEN_TYPE_UD)); this->setSrc0(insn, msg); this->setSrc1(insn, GenRegister::immud(0)); // here src_num means register that will be write out: in terms of 32byte register number setScratchMessage(this, insn, offset, block_size, channel_mode, GEN_SCRATCH_WRITE, src_num+1, 0); } void GenEncoder::SCRATCH_READ(GenRegister dst, GenRegister src, uint32_t offset, uint32_t size, uint32_t dst_num, uint32_t channel_mode) { assert(dst_num == 1 || dst_num ==2); uint32_t block_size = dst_num == 1 ? GEN_SCRATCH_BLOCK_SIZE_1 : GEN_SCRATCH_BLOCK_SIZE_2; GenInstruction *insn = this->next(GEN_OPCODE_SEND); this->setHeader(insn); this->setDst(insn, dst); this->setSrc0(insn, src); this->setSrc1(insn, GenRegister::immud(0)); // here dst_num is the register that will be write-back: in terms of 32byte register setScratchMessage(this, insn, offset, block_size, channel_mode, GEN_SCRATCH_READ, 1, dst_num); } void GenEncoder::EOT(uint32_t msg) { GenInstruction *insn = this->next(GEN_OPCODE_SEND); this->setDst(insn, GenRegister::retype(GenRegister::null(), GEN_TYPE_UD)); this->setSrc0(insn, GenRegister::ud8grf(msg,0)); this->setSrc1(insn, GenRegister::immud(0)); insn->header.execution_size = GEN_WIDTH_8; insn->bits3.spawner_gen5.resource = GEN_DO_NOT_DEREFERENCE_URB; insn->bits3.spawner_gen5.msg_length = 1; insn->bits3.spawner_gen5.end_of_thread = 1; insn->header.destreg_or_condmod = GEN_SFID_THREAD_SPAWNER; } } /* namespace gbe */ Release_v0.3/backend/src/backend/gen_encoder.hpp000066400000000000000000000175071223142177000217430ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /* Copyright (C) Intel Corp. 2006. All Rights Reserved. Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to develop this 3D driver. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice (including the next paragraph) shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. **********************************************************************/ /* * Authors: * Keith Whitwell */ #ifndef __GBE_GEN_ENCODER_HPP__ #define __GBE_GEN_ENCODER_HPP__ #include "backend/gen_defs.hpp" #include "backend/gen_register.hpp" #include "sys/platform.hpp" #include "sys/vector.hpp" #include namespace gbe { /*! Helper structure to emit Gen instructions */ class GenEncoder { public: /*! simdWidth is the default width for the instructions */ GenEncoder(uint32_t simdWidth, uint32_t gen); /*! Size of the stack (should be large enough) */ enum { MAX_STATE_NUM = 16 }; /*! Push the current instruction state */ void push(void); /*! Pop the latest pushed state */ void pop(void); /*! The instruction stream we are building */ vector store; /*! Current instruction state to use */ GenInstructionState curr; /*! State used to encode the instructions */ GenInstructionState stack[MAX_STATE_NUM]; /*! Number of states currently pushed */ uint32_t stateNum; /*! Gen generation to encode */ uint32_t gen; //////////////////////////////////////////////////////////////////////// // Encoding functions //////////////////////////////////////////////////////////////////////// #define ALU1(OP) void OP(GenRegister dest, GenRegister src0); #define ALU2(OP) void OP(GenRegister dest, GenRegister src0, GenRegister src1); #define ALU3(OP) void OP(GenRegister dest, GenRegister src0, GenRegister src1, GenRegister src2); ALU1(MOV) ALU1(FBH) ALU1(FBL) ALU2(SUBB) ALU2(UPSAMPLE_SHORT) ALU2(UPSAMPLE_INT) ALU1(RNDZ) ALU1(RNDE) ALU1(RNDD) ALU1(RNDU) ALU2(SEL) ALU1(NOT) ALU2(AND) ALU2(OR) ALU2(XOR) ALU2(SHR) ALU2(SHL) ALU2(RSR) ALU2(RSL) ALU2(ASR) ALU2(ADD) ALU2(ADDC) ALU2(MUL) ALU1(FRC) ALU2(MAC) ALU2(MACH) ALU1(LZD) ALU2(LINE) ALU2(PLN) ALU3(MAD) //ALU2(MOV_DF); #undef ALU1 #undef ALU2 #undef ALU3 void MOV_DF(GenRegister dest, GenRegister src0, GenRegister tmp = GenRegister::null()); void LOAD_DF_IMM(GenRegister dest, GenRegister tmp, double value); void LOAD_INT64_IMM(GenRegister dest, int64_t value); /*! Barrier message (to synchronize threads of a workgroup) */ void BARRIER(GenRegister src); /*! Memory fence message (to order loads and stores between threads) */ void FENCE(GenRegister dst); /*! Jump indexed instruction */ void JMPI(GenRegister src); /*! Compare instructions */ void CMP(uint32_t conditional, GenRegister src0, GenRegister src1); /*! Select with embedded compare (like sel.le ...) */ void SEL_CMP(uint32_t conditional, GenRegister dst, GenRegister src0, GenRegister src1); /*! EOT is used to finish GPGPU threads */ void EOT(uint32_t msg_nr); /*! No-op */ void NOP(void); /*! Wait instruction (used for the barrier) */ void WAIT(void); /*! Atomic instructions */ void ATOMIC(GenRegister dst, uint32_t function, GenRegister src, uint32_t bti, uint32_t srcNum); /*! Read 64-bits float/int arrays */ void READ64(GenRegister dst, GenRegister tmp, GenRegister addr, GenRegister src, uint32_t bti, uint32_t elemNum); /*! Write 64-bits float/int arrays */ void WRITE64(GenRegister src, GenRegister data, uint32_t bti, uint32_t elemNum, bool is_scalar); /*! Untyped read (upto 4 channels) */ void UNTYPED_READ(GenRegister dst, GenRegister src, uint32_t bti, uint32_t elemNum); /*! Untyped write (upto 4 channels) */ void UNTYPED_WRITE(GenRegister src, uint32_t bti, uint32_t elemNum); /*! Byte gather (for unaligned bytes, shorts and ints) */ void BYTE_GATHER(GenRegister dst, GenRegister src, uint32_t bti, uint32_t elemSize); /*! Byte scatter (for unaligned bytes, shorts and ints) */ void BYTE_SCATTER(GenRegister src, uint32_t bti, uint32_t elemSize); /*! DWord gather (for constant cache read) */ void DWORD_GATHER(GenRegister dst, GenRegister src, uint32_t bti); /*! for scratch memory read */ void SCRATCH_READ(GenRegister msg, GenRegister dst, uint32_t offset, uint32_t size, uint32_t dst_num, uint32_t channel_mode); /*! for scratch memory write */ void SCRATCH_WRITE(GenRegister msg, uint32_t offset, uint32_t size, uint32_t src_num, uint32_t channel_mode); /*! Send instruction for the sampler */ void SAMPLE(GenRegister dest, GenRegister msg, bool header_present, unsigned char bti, unsigned char sampler, unsigned int coord_cnt, unsigned int simdWidth, uint32_t writemask, uint32_t return_format); /*! TypedWrite instruction for texture */ void TYPED_WRITE(GenRegister header, bool header_present, unsigned char bti); /*! Extended math function (2 sources) */ void MATH(GenRegister dst, uint32_t function, GenRegister src0, GenRegister src1); /*! Extended math function (1 source) */ void MATH(GenRegister dst, uint32_t function, GenRegister src); /*! Patch JMPI (located at index insnID) with the given jump distance */ void patchJMPI(uint32_t insnID, int32_t jumpDistance); //////////////////////////////////////////////////////////////////////// // Helper functions to encode //////////////////////////////////////////////////////////////////////// void setHeader(GenInstruction *insn); void setDst(GenInstruction *insn, GenRegister dest); void setSrc0(GenInstruction *insn, GenRegister reg); void setSrc1(GenInstruction *insn, GenRegister reg); GenInstruction *next(uint32_t opcode); uint32_t n_instruction(void) const { return store.size(); } GBE_CLASS(GenEncoder); //!< Use custom allocators }; } /* namespace gbe */ #endif /* __GBE_GEN_ENCODER_HPP__ */ Release_v0.3/backend/src/backend/gen_insn_gen7_schedule_info.hxx000066400000000000000000000045371223142177000251210ustar00rootroot00000000000000// Family Latency SIMD16 SIMD8 DECL_GEN7_SCHEDULE(Label, 0, 0, 0) DECL_GEN7_SCHEDULE(Unary, 20, 4, 2) DECL_GEN7_SCHEDULE(UnaryWithTemp, 20, 4, 2) DECL_GEN7_SCHEDULE(Binary, 20, 4, 2) DECL_GEN7_SCHEDULE(BinaryWithTemp, 20, 4, 2) DECL_GEN7_SCHEDULE(Ternary, 20, 4, 2) DECL_GEN7_SCHEDULE(I64Shift, 20, 4, 2) DECL_GEN7_SCHEDULE(I64HADD, 20, 4, 2) DECL_GEN7_SCHEDULE(I64RHADD, 20, 4, 2) DECL_GEN7_SCHEDULE(I64ToFloat, 20, 4, 2) DECL_GEN7_SCHEDULE(I64MULHI, 20, 4, 2) DECL_GEN7_SCHEDULE(I64MADSAT, 20, 4, 2) DECL_GEN7_SCHEDULE(Compare, 20, 4, 2) DECL_GEN7_SCHEDULE(I64Compare, 20, 4, 2) DECL_GEN7_SCHEDULE(I64DIVREM, 20, 4, 2) DECL_GEN7_SCHEDULE(Jump, 14, 1, 1) DECL_GEN7_SCHEDULE(IndirectMove, 20, 2, 2) DECL_GEN7_SCHEDULE(Eot, 20, 1, 1) DECL_GEN7_SCHEDULE(NoOp, 20, 2, 2) DECL_GEN7_SCHEDULE(Wait, 20, 2, 2) DECL_GEN7_SCHEDULE(Math, 20, 4, 2) DECL_GEN7_SCHEDULE(Barrier, 80, 1, 1) DECL_GEN7_SCHEDULE(Fence, 80, 1, 1) DECL_GEN7_SCHEDULE(Read64, 80, 1, 1) DECL_GEN7_SCHEDULE(Write64, 80, 1, 1) DECL_GEN7_SCHEDULE(UntypedRead, 80, 1, 1) DECL_GEN7_SCHEDULE(UntypedWrite, 80, 1, 1) DECL_GEN7_SCHEDULE(ByteGather, 80, 1, 1) DECL_GEN7_SCHEDULE(ByteScatter, 80, 1, 1) DECL_GEN7_SCHEDULE(DWordGather, 80, 1, 1) DECL_GEN7_SCHEDULE(Sample, 80, 1, 1) DECL_GEN7_SCHEDULE(TypedWrite, 80, 1, 1) DECL_GEN7_SCHEDULE(SpillReg, 80, 1, 1) DECL_GEN7_SCHEDULE(UnSpillReg, 80, 1, 1) DECL_GEN7_SCHEDULE(GetImageInfo, 20, 4, 2) DECL_GEN7_SCHEDULE(Atomic, 80, 1, 1) DECL_GEN7_SCHEDULE(I64MUL, 20, 4, 2) DECL_GEN7_SCHEDULE(I64SATADD, 20, 4, 2) DECL_GEN7_SCHEDULE(I64SATSUB, 20, 4, 2) Release_v0.3/backend/src/backend/gen_insn_scheduling.cpp000066400000000000000000000564101223142177000234670ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file gen_insn_scheduling.cpp * \author Benjamin Segovia */ /* * Overall idea: * ============= * * This is the instruction scheduling part of the code. With Gen, we actually * have a simple strategy to follow. Indeed, here are the constraints: * * 1 - the number of registers per HW thread is constant and given (128 32 bytes * GRF per thread). So, we can use all these registers with no penalty * 2 - spilling is super bad. Instruction latency matters but the top priority * is to avoid as much as possible spilling * * * We schedule twice using at each time a local forward list scheduler * * Before the register allocation * ============================== * * We try to limit the register pressure. * Well, this is a hard problem and we have a decent strategy now that we called * "zero cycled LIFO scheduling". * We use a local forward list scheduling and we schedule the instructions in a * LIFO order i.e. as a stack. Basically, we take the most recent instruction * and schedule it right away. Obviously we ignore completely the real latencies * and throuputs and just simulate instructions that are issued and completed in * zero cycle. For the complex kernels we already have (like menger sponge), * this provides a pretty good strategy enabling SIMD16 code generation where * when scheduling is deactivated, even SIMD8 fails * * One may argue that this strategy is bad, latency wise. This is not true since * the register allocator will anyway try to burn as many registers as possible. * So, there is still opportunities to schedule after register allocation. * * Our idea seems to work decently. There is however a strong research article * that is able to near-optimally reschudle the instructions to minimize * register use. This is: * * "Minimum Register Instruction Sequence Problem: Revisiting Optimal Code * Generation for DAGs" * * After the register allocation * ============================== * * This is here a pretty simple strategy based on a regular forward list * scheduling. Since Gen is a co-issue based machine, this is useless to take * into account really precise timings since instruction issues will happen * out-of-order based on other thread executions. * * Note that we over-simplify the problem. Indeed, Gen register file is flexible * and we are able to use sub-registers of GRF in particular when we handle * uniforms or mask registers which are spilled in GRFs. Thing is that two * uniforms may not interfere even if they belong to the same GRF (i.e. they use * two different sub-registers). This means that the interference relation is * not transitive for Gen. To simplify everything, we just take consider full * GRFs (in SIMD8) or double full GRFs (in SIMD16) regardless of the fact this * is a uniform, a mask or a regular GRF. * * Obviously, this leads to extra dependencies in the code. */ #include "backend/gen_insn_selection.hpp" #include "backend/gen_reg_allocation.hpp" #include "sys/cvar.hpp" #include "sys/intrusive_list.hpp" namespace gbe { // Helper structure to schedule the basic blocks struct SelectionScheduler; // Node for the schedule DAG struct ScheduleDAGNode; /*! We need to chain together the node we point */ struct ScheduleListNode : public intrusive_list_node { INLINE ScheduleListNode(ScheduleDAGNode *node) : node(node) {} ScheduleDAGNode *node; }; /*! Node of the DAG */ struct ScheduleDAGNode { INLINE ScheduleDAGNode(SelectionInstruction &insn) : insn(insn), refNum(0), retiredCycle(0) {} bool dependsOn(ScheduleDAGNode *node) const { GBE_ASSERT(node != NULL); for (auto child : node->children) if (child.node == this) return true; return false; } /*! Children that depends on us */ intrusive_list children; /*! Instruction after code selection */ SelectionInstruction &insn; /*! Number of nodes that point to us (i.e. nodes we depend on) */ uint32_t refNum; /*! Cycle when the instruction is retired */ uint32_t retiredCycle; }; /*! To track loads and stores */ enum GenMemory : uint8_t { GLOBAL_MEMORY = 0, LOCAL_MEMORY, MAX_MEM_SYSTEM }; /*! Do we allocate after or before the register allocation? */ enum SchedulePolicy { PRE_ALLOC = 0, // LIFO scheduling (tends to limit register pressure) POST_ALLOC // FIFO scheduling (limits latency problems) }; /*! Helper structure to handle dependencies while scheduling. Takes into * account virtual and physical registers and memory sub-systems */ struct DependencyTracker : public NonCopyable { DependencyTracker(const Selection &selection, SelectionScheduler &scheduler); /*! Reset it before scheduling a new block */ void clear(void); /*! Get an index in the node array for the given register */ uint32_t getIndex(GenRegister reg) const; /*! Get an index in the node array for the given memory system */ uint32_t getIndex(uint32_t bti) const; /*! Add a new dependency "node0 depends on node1" */ void addDependency(ScheduleDAGNode *node0, ScheduleDAGNode *node1); /*! Add a new dependency "node0 depends on node located at index" */ void addDependency(ScheduleDAGNode *node0, uint32_t index); /*! Add a new dependency "node located at index depends on node0" */ void addDependency(uint32_t index, ScheduleDAGNode *node0); /*! No dependency for null registers and immediate */ INLINE bool ignoreDependency(GenRegister reg) const { if (reg.file == GEN_IMMEDIATE_VALUE) return true; else if (reg.file == GEN_ARCHITECTURE_REGISTER_FILE) { if ((reg.nr & 0xf0) == GEN_ARF_NULL) return true; } return false; } /*! Owns the tracker */ SelectionScheduler &scheduler; /*! Add a new dependency "node0 depends on node set for register reg" */ INLINE void addDependency(ScheduleDAGNode *node0, GenRegister reg) { if (this->ignoreDependency(reg) == false) { const uint32_t index = this->getIndex(reg); this->addDependency(node0, index); if (reg.isdf() || reg.isint64()) this->addDependency(node0, index + 1); } } /*! Add a new dependency "node set for register reg depends on node0" */ INLINE void addDependency(GenRegister reg, ScheduleDAGNode *node0) { if (this->ignoreDependency(reg) == false) { const uint32_t index = this->getIndex(reg); this->addDependency(index, node0); if (reg.isdf() || reg.isint64()) this->addDependency(index + 1, node0); } } /*! Make the node located at insnID a barrier */ void makeBarrier(int32_t insnID, int32_t insnNum); /*! Update all the writes (memory, predicates, registers) */ void updateWrites(ScheduleDAGNode *node); /*! Maximum number of *physical* flag registers */ static const uint32_t MAX_FLAG_REGISTER = 8u; /*! Maximum number of *physical* accumulators registers */ static const uint32_t MAX_ACC_REGISTER = 1u; /*! Stores the last node that wrote to a register / memory ... */ vector nodes; /*! Stores the nodes per instruction */ vector insnNodes; /*! Number of virtual register in the selection */ uint32_t grfNum; }; /*! Perform the instruction scheduling */ struct SelectionScheduler : public NonCopyable { /*! Init the book keeping structures */ SelectionScheduler(GenContext &ctx, Selection &selection, SchedulePolicy policy); /*! Make all lists empty */ void clearLists(void); /*! Return the number of instructions to schedule in the DAG */ int32_t buildDAG(SelectionBlock &bb); /*! Schedule the DAG */ void scheduleDAG(SelectionBlock &bb, int32_t insnNum); /*! To limit register pressure or limit insn latency problems */ SchedulePolicy policy; /*! Make ScheduleListNode allocation faster */ DECL_POOL(ScheduleListNode, listPool); /*! Make ScheduleDAGNode allocation faster */ DECL_POOL(ScheduleDAGNode, nodePool); /*! Ready list is instructions that can be scheduled */ intrusive_list ready; /*! Active list is instructions that are executing */ intrusive_list active; /*! Handle complete compilation */ GenContext &ctx; /*! Code to schedule */ Selection &selection; /*! To help tracking dependencies */ DependencyTracker tracker; }; DependencyTracker::DependencyTracker(const Selection &selection, SelectionScheduler &scheduler) : scheduler(scheduler) { if (scheduler.policy == PRE_ALLOC) { this->grfNum = selection.getRegNum(); nodes.resize(grfNum + MAX_FLAG_REGISTER + MAX_ACC_REGISTER + MAX_MEM_SYSTEM); } else { const uint32_t simdWidth = scheduler.ctx.getSimdWidth(); GBE_ASSERT(simdWidth == 8 || simdWidth == 16); this->grfNum = simdWidth == 8 ? 128 : 64; nodes.resize(grfNum + MAX_FLAG_REGISTER + MAX_ACC_REGISTER + MAX_MEM_SYSTEM); } insnNodes.resize(selection.getLargestBlockSize()); } void DependencyTracker::clear(void) { for (auto &x : nodes) x = NULL; } void DependencyTracker::addDependency(ScheduleDAGNode *node0, ScheduleDAGNode *node1) { if (node0 != NULL && node1 != NULL && node0 != node1 && node0->dependsOn(node1) == false) { ScheduleListNode *dep = scheduler.newScheduleListNode(node0); node0->refNum++; node1->children.push_back(dep); } } void DependencyTracker::addDependency(ScheduleDAGNode *node, uint32_t index) { this->addDependency(node, this->nodes[index]); } void DependencyTracker::addDependency(uint32_t index, ScheduleDAGNode *node) { this->addDependency(this->nodes[index], node); } void DependencyTracker::makeBarrier(int32_t barrierID, int32_t insnNum) { ScheduleDAGNode *barrier = this->insnNodes[barrierID]; // The barrier depends on all nodes before it for (int32_t insnID = 0; insnID < barrierID; ++insnID) this->addDependency(barrier, this->insnNodes[insnID]); // All nodes after barriers depend on the barrier for (int32_t insnID = barrierID + 1; insnID < insnNum; ++insnID) this->addDependency(this->insnNodes[insnID], barrier); } static GenRegister getFlag(const SelectionInstruction &insn) { if (insn.state.physicalFlag) { const uint32_t nr = insn.state.flag; const uint32_t subnr = insn.state.subFlag; return GenRegister::flag(nr, subnr); } else return GenRegister::uw1grf(ir::Register(insn.state.flagIndex)); } uint32_t DependencyTracker::getIndex(GenRegister reg) const { // Non GRF physical register if (reg.physical) { //GBE_ASSERT (reg.file == GEN_ARCHITECTURE_REGISTER_FILE); if(reg.file == GEN_ARCHITECTURE_REGISTER_FILE) { const uint32_t file = reg.nr & 0xf0; const uint32_t nr = reg.nr & 0x0f; if (file == GEN_ARF_FLAG) { const uint32_t subnr = reg.subnr / sizeof(uint16_t); GBE_ASSERT(nr < MAX_FLAG_REGISTER && (subnr == 0 || subnr == 1)); return grfNum + 2*nr + subnr; } else if (file == GEN_ARF_ACCUMULATOR) { GBE_ASSERT(nr < MAX_ACC_REGISTER); return grfNum + MAX_FLAG_REGISTER + nr; } else { NOT_SUPPORTED; return 0; } } else { const uint32_t simdWidth = scheduler.ctx.getSimdWidth(); return simdWidth == 8 ? reg.nr : reg.nr / 2; } } // We directly manipulate physical GRFs here else if (scheduler.policy == POST_ALLOC) { const GenRegister physical = scheduler.ctx.ra->genReg(reg); const uint32_t simdWidth = scheduler.ctx.getSimdWidth(); return simdWidth == 8 ? physical.nr : physical.nr / 2; } // We use virtual registers since allocation is not done yet else return reg.value.reg; } uint32_t DependencyTracker::getIndex(uint32_t bti) const { const uint32_t memDelta = grfNum + MAX_FLAG_REGISTER + MAX_ACC_REGISTER; return bti == 0xfe ? memDelta + LOCAL_MEMORY : memDelta + GLOBAL_MEMORY; } void DependencyTracker::updateWrites(ScheduleDAGNode *node) { const SelectionInstruction &insn = node->insn; // Track writes in registers for (uint32_t dstID = 0; dstID < insn.dstNum; ++dstID) { const GenRegister dst = insn.dst(dstID); if (this->ignoreDependency(dst) == false) { const uint32_t index = this->getIndex(dst); this->nodes[index] = node; if (dst.isdf() || dst.isint64()) this->nodes[index + 1] = node; } } // Track writes in predicates if (insn.opcode == SEL_OP_CMP || insn.opcode == SEL_OP_I64CMP) { const uint32_t index = this->getIndex(getFlag(insn)); this->nodes[index] = node; } // Track writes in accumulators if (insn.state.accWrEnable) { const uint32_t index = this->getIndex(GenRegister::acc()); this->nodes[index] = node; } // Track writes in memory if (insn.isWrite()) { const uint32_t index = this->getIndex(insn.extra.function); this->nodes[index] = node; } if(insn.opcode == SEL_OP_SPILL_REG) { const uint32_t index = this->getIndex(0xff); this->nodes[index] = node; } // Consider barriers and wait write to memory if (insn.opcode == SEL_OP_BARRIER || insn.opcode == SEL_OP_FENCE || insn.opcode == SEL_OP_WAIT) { const uint32_t local = this->getIndex(0xfe); const uint32_t global = this->getIndex(0x00); this->nodes[local] = this->nodes[global] = node; } } /*! Kind-of roughly estimated latency. Nothing real here */ static uint32_t getLatencyGen7(const SelectionInstruction &insn) { #define DECL_GEN7_SCHEDULE(FAMILY, LATENCY, SIMD16, SIMD8)\ const uint32_t FAMILY##InstructionLatency = LATENCY; #include "gen_insn_gen7_schedule_info.hxx" #undef DECL_GEN7_SCHEDULE switch (insn.opcode) { #define DECL_SELECTION_IR(OP, FAMILY) case SEL_OP_##OP: return FAMILY##Latency; #include "backend/gen_insn_selection.hxx" #undef DECL_SELECTION_IR }; return 0; } /*! Throughput in cycles for SIMD8 or SIMD16 */ static uint32_t getThroughputGen7(const SelectionInstruction &insn, bool isSIMD8) { #define DECL_GEN7_SCHEDULE(FAMILY, LATENCY, SIMD16, SIMD8)\ const uint32_t FAMILY##InstructionThroughput = isSIMD8 ? SIMD8 : SIMD16; #include "gen_insn_gen7_schedule_info.hxx" #undef DECL_GEN7_SCHEDULE switch (insn.opcode) { #define DECL_SELECTION_IR(OP, FAMILY) case SEL_OP_##OP: return FAMILY##Throughput; #include "backend/gen_insn_selection.hxx" #undef DECL_SELECTION_IR }; return 0; } SelectionScheduler::SelectionScheduler(GenContext &ctx, Selection &selection, SchedulePolicy policy) : policy(policy), listPool(nextHighestPowerOf2(selection.getLargestBlockSize())), ctx(ctx), selection(selection), tracker(selection, *this) { this->clearLists(); } void SelectionScheduler::clearLists(void) { this->ready.fast_clear(); this->active.fast_clear(); } int32_t SelectionScheduler::buildDAG(SelectionBlock &bb) { nodePool.rewind(); listPool.rewind(); tracker.clear(); this->clearLists(); // Track write-after-write and read-after-write dependencies int32_t insnNum = 0; for (auto &insn : bb.insnList) { // Create a new node for this instruction ScheduleDAGNode *node = this->newScheduleDAGNode(insn); tracker.insnNodes[insnNum++] = node; // read-after-write in registers for (uint32_t srcID = 0; srcID < insn.srcNum; ++srcID) tracker.addDependency(node, insn.src(srcID)); // read-after-write for predicate if (insn.state.predicate != GEN_PREDICATE_NONE) tracker.addDependency(node, getFlag(insn)); // read-after-write in memory if (insn.isRead()) { const uint32_t index = tracker.getIndex(insn.extra.function); tracker.addDependency(node, index); } //read-after-write of scratch memory if (insn.opcode == SEL_OP_UNSPILL_REG) { const uint32_t index = tracker.getIndex(0xff); tracker.addDependency(node, index); } // Consider barriers and wait are reading memory (local and global) if (insn.opcode == SEL_OP_BARRIER || insn.opcode == SEL_OP_FENCE || insn.opcode == SEL_OP_WAIT) { const uint32_t local = tracker.getIndex(0xfe); const uint32_t global = tracker.getIndex(0x00); tracker.addDependency(node, local); tracker.addDependency(node, global); } // write-after-write in registers for (uint32_t dstID = 0; dstID < insn.dstNum; ++dstID) tracker.addDependency(node, insn.dst(dstID)); // write-after-write for predicate if (insn.opcode == SEL_OP_CMP || insn.opcode == SEL_OP_I64CMP) tracker.addDependency(node, getFlag(insn)); // write-after-write for accumulators if (insn.state.accWrEnable) tracker.addDependency(node, GenRegister::acc()); // write-after-write in memory if (insn.isWrite()) { const uint32_t index = tracker.getIndex(insn.extra.function); tracker.addDependency(node, index); } // write-after-write in scratch memory if (insn.opcode == SEL_OP_SPILL_REG) { const uint32_t index = tracker.getIndex(0xff); tracker.addDependency(node, index); } // Consider barriers and wait are writing memory (local and global) if (insn.opcode == SEL_OP_BARRIER || insn.opcode == SEL_OP_FENCE || insn.opcode == SEL_OP_WAIT) { const uint32_t local = tracker.getIndex(0xfe); const uint32_t global = tracker.getIndex(0x00); tracker.addDependency(node, local); tracker.addDependency(node, global); } // Track all writes done by the instruction tracker.updateWrites(node); } // Track write-after-read dependencies tracker.clear(); for (int32_t insnID = insnNum-1; insnID >= 0; --insnID) { ScheduleDAGNode *node = tracker.insnNodes[insnID]; const SelectionInstruction &insn = node->insn; // write-after-read in registers for (uint32_t srcID = 0; srcID < insn.srcNum; ++srcID) tracker.addDependency(insn.src(srcID), node); // write-after-read for predicate if (insn.state.predicate != GEN_PREDICATE_NONE) tracker.addDependency(getFlag(insn), node); // write-after-read in memory if (insn.isRead()) { const uint32_t index = tracker.getIndex(insn.extra.function); tracker.addDependency(index, node); } // Consider barriers and wait are reading memory (local and global) if (insn.opcode == SEL_OP_BARRIER || insn.opcode == SEL_OP_FENCE || insn.opcode == SEL_OP_WAIT) { const uint32_t local = tracker.getIndex(0xfe); const uint32_t global = tracker.getIndex(0x00); tracker.addDependency(local, node); tracker.addDependency(global, node); } // Track all writes done by the instruction tracker.updateWrites(node); } // Make labels and branches non-schedulable (i.e. they act as barriers) for (int32_t insnID = 0; insnID < insnNum; ++insnID) { ScheduleDAGNode *node = tracker.insnNodes[insnID]; if (node->insn.isBranch() || node->insn.isLabel() || node->insn.opcode == SEL_OP_EOT) tracker.makeBarrier(insnID, insnNum); } // Build the initial ready list (should only be the label actually) for (int32_t insnID = 0; insnID < insnNum; ++insnID) { ScheduleDAGNode *node = tracker.insnNodes[insnID]; if (node->refNum == 0) { ScheduleListNode *listNode = this->newScheduleListNode(node); this->ready.push_back(listNode); } } return insnNum; } void SelectionScheduler::scheduleDAG(SelectionBlock &bb, int32_t insnNum) { uint32_t cycle = 0; const bool isSIMD8 = this->ctx.getSimdWidth() == 8; while (insnNum) { // Retire all the instructions that finished for (auto toRetireIt = active.begin(); toRetireIt != active.end();) { ScheduleDAGNode *toRetireNode = toRetireIt.node()->node; // Instruction is now complete if (toRetireNode->retiredCycle <= cycle) { toRetireIt = this->active.erase(toRetireIt); // Traverse all children and make them ready if no more dependency auto &children = toRetireNode->children; for (auto it = children.begin(); it != children.end();) { if (--it->node->refNum == 0) { ScheduleListNode *listNode = it.node(); it = children.erase(it); this->ready.push_back(listNode); } else ++it; } } // Get the next one else ++toRetireIt; } // Try to schedule something from the ready list intrusive_list::iterator toSchedule; if (policy == POST_ALLOC) // FIFO scheduling toSchedule = this->ready.begin(); else // LIFO scheduling toSchedule = this->ready.rbegin(); // toSchedule = this->ready.begin(); if (toSchedule != this->ready.end()) { // The instruction is instantaneously issued to simulate zero cycle // scheduling if (policy == POST_ALLOC) cycle += getThroughputGen7(toSchedule->node->insn, isSIMD8); this->ready.erase(toSchedule); this->active.push_back(toSchedule.node()); // When we schedule before allocation, instruction is instantaneously // ready. This allows to have a real LIFO strategy if (policy == POST_ALLOC) toSchedule->node->retiredCycle = cycle + getLatencyGen7(toSchedule->node->insn); else toSchedule->node->retiredCycle = cycle; bb.append(&toSchedule->node->insn); insnNum--; } else cycle++; } } BVAR(OCL_POST_ALLOC_INSN_SCHEDULE, false); BVAR(OCL_PRE_ALLOC_INSN_SCHEDULE, false); void schedulePostRegAllocation(GenContext &ctx, Selection &selection) { if (OCL_POST_ALLOC_INSN_SCHEDULE) { SelectionScheduler scheduler(ctx, selection, POST_ALLOC); for (auto &bb : *selection.blockList) { const int32_t insnNum = scheduler.buildDAG(bb); bb.insnList.clear(); scheduler.scheduleDAG(bb, insnNum); } } } void schedulePreRegAllocation(GenContext &ctx, Selection &selection) { if (OCL_PRE_ALLOC_INSN_SCHEDULE) { SelectionScheduler scheduler(ctx, selection, PRE_ALLOC); for (auto &bb : *selection.blockList) { const int32_t insnNum = scheduler.buildDAG(bb); bb.insnList.clear(); scheduler.scheduleDAG(bb, insnNum); } } } } /* namespace gbe */ Release_v0.3/backend/src/backend/gen_insn_scheduling.hpp000066400000000000000000000026251223142177000234730ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file gen_insn_scheduling.hpp * \author Benjamin Segovia */ #ifndef __GBE_GEN_INSN_SCHEDULING_HPP__ #define __GBE_GEN_INSN_SCHEDULING_HPP__ namespace gbe { class Selection; // Pre ISA code class GenContext; // Handle compilation for Gen /*! Schedule the code per basic block (tends to limit register number) */ void schedulePreRegAllocation(GenContext &ctx, Selection &selection); /*! Schedule the code per basic block (tends to deal with insn latency) */ void schedulePostRegAllocation(GenContext &ctx, Selection &selection); } /* namespace gbe */ #endif /* __GBE_GEN_INSN_SCHEDULING_HPP__ */ Release_v0.3/backend/src/backend/gen_insn_selection.cpp000066400000000000000000003416421223142177000233330ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file gen_insn_selection.cpp * \author Benjamin Segovia */ /* This is the instruction selection code. First of all, this is a bunch of c++ * crap. Sorry if this is not that readable. Anyway, the goal here is to take * GenIR code (i.e. the very regular, very RISC IR) and to produce GenISA with * virtual registers (i.e. regular GenIR registers). * * Overall idea: * ============= * * There is a lot of papers and research about that but I tried to keep it * simple. No dynamic programming, nothing like this. Just a recursive maximal * munch. * * Basically, the code is executed per basic block from bottom to top. Patterns * of GenIR instructions are defined and each instruction is matched against the * best pattern i.e. the pattern that catches the largest number of * instructions. Once matched, a sequence of instructions is output. * * Each instruction the match depends on is then marked as "root" i.e. we * indicate that each of these instructions must be generated: we indeed need their * destinations for the next instructions (remember that we generate the code in * reverse order) * * Patterns: * ========= * * There is a lot of patterns and I did not implement all of them obviously. I * just quickly gather the complete code to make pattern implementation kind of * easy. This is pretty verbose to add a pattern but it should be not too hard * to add new ones. * * To create and register patterns, I just abused C++ pre-main. A bunch of * patterns is then created and sorted per opcode (i.e. the opcode of the root * of the pattern): this creates a library of patterns that may be used in * run-time. * * Predication / Masking and CFG linearization * =========================================== * * The current version is based on an unfortunate choice. Basically, the problem * to solve is how to map unstructured branches (i.e. regular gotos) onto Gen. * Gen has a native support for structured branches (if/else/endif/while...) but * nothing really native for unstructured branches. * * The idea we implemented is simple. We stole one flag register (here f0.0) to * mask all the instructions (and only activate the proper SIMD lanes) and we * use the CFG linearization technique to properly handle the control flow. This * is not really good for one particular reason: Gen instructions must use the * *same* flag register for the predicates (used for masking) and the * conditional modifier (used as a destination for CMP). This leads to extra * complications with compare instructions and select instructions. Basically, * we need to insert extra MOVs. * * Also, there is some extra kludge to handle the predicates for JMPI. * * See TODO for a better idea for branching and masking * * TODO: * ===== * * Sadly, I recreated here a new DAG class. This is just a bad idea since we * already have the DAG per basic block with the Function graph i.e. the * complete graph of uses and definitions. I think we should be able to save a * lot of code here if we can simply reuse the code from UD / DU chains. * * Finally, cross-block instruction selection is quite possible with this simple * approach. Basically, instructions from dominating blocks could be merged and * matched with other instructions in the dominated block. This leads to the * interesting approach which consists in traversing the dominator tree in post * order * * About masking and branching, a much better idea (that I found later unfortunately) * is to replace the use of the flag by uses of if/endif to enclose the basic * block. So, instead of using predication, we use auto-masking. The very cool * consequence is that we can reintegrate back the structured branches. * Basically, we will be able to identify branches that can be mapped to * structured branches and mix nicely unstructured branches (which will use * jpmi, if/endif to mask the blocks) and structured branches (which are pretty * fast) */ #include "backend/gen_insn_selection.hpp" #include "backend/gen_context.hpp" #include "ir/function.hpp" #include "ir/liveness.hpp" #include "ir/profile.hpp" #include "sys/cvar.hpp" #include "sys/vector.hpp" #include namespace gbe { /////////////////////////////////////////////////////////////////////////// // Helper functions /////////////////////////////////////////////////////////////////////////// uint32_t getGenType(ir::Type type) { using namespace ir; switch (type) { case TYPE_BOOL: return GEN_TYPE_UW; case TYPE_S8: return GEN_TYPE_B; case TYPE_U8: return GEN_TYPE_UB; case TYPE_S16: return GEN_TYPE_W; case TYPE_U16: return GEN_TYPE_UW; case TYPE_S32: return GEN_TYPE_D; case TYPE_U32: return GEN_TYPE_UD; case TYPE_S64: return GEN_TYPE_L; case TYPE_U64: return GEN_TYPE_UL; case TYPE_FLOAT: return GEN_TYPE_F; case TYPE_DOUBLE: return GEN_TYPE_DF; default: NOT_SUPPORTED; return GEN_TYPE_F; } } uint32_t getGenCompare(ir::Opcode opcode) { using namespace ir; switch (opcode) { case OP_LE: return GEN_CONDITIONAL_LE; case OP_LT: return GEN_CONDITIONAL_L; case OP_GE: return GEN_CONDITIONAL_GE; case OP_GT: return GEN_CONDITIONAL_G; case OP_EQ: return GEN_CONDITIONAL_EQ; case OP_NE: return GEN_CONDITIONAL_NEQ; default: NOT_SUPPORTED; return 0u; }; } /////////////////////////////////////////////////////////////////////////// // SelectionInstruction /////////////////////////////////////////////////////////////////////////// SelectionInstruction::SelectionInstruction(SelectionOpcode op, uint32_t dst, uint32_t src) : parent(NULL), opcode(op), dstNum(dst), srcNum(src) {} void SelectionInstruction::prepend(SelectionInstruction &other) { gbe::prepend(&other, this); other.parent = this->parent; } void SelectionInstruction::append(SelectionInstruction &other) { gbe::append(&other, this); other.parent = this->parent; } bool SelectionInstruction::isRead(void) const { return this->opcode == SEL_OP_UNTYPED_READ || this->opcode == SEL_OP_READ64 || this->opcode == SEL_OP_ATOMIC || this->opcode == SEL_OP_BYTE_GATHER; } bool SelectionInstruction::isWrite(void) const { return this->opcode == SEL_OP_UNTYPED_WRITE || this->opcode == SEL_OP_WRITE64 || this->opcode == SEL_OP_ATOMIC || this->opcode == SEL_OP_BYTE_SCATTER; } bool SelectionInstruction::isBranch(void) const { return this->opcode == SEL_OP_JMPI; } bool SelectionInstruction::isLabel(void) const { return this->opcode == SEL_OP_LABEL; } /////////////////////////////////////////////////////////////////////////// // SelectionVector /////////////////////////////////////////////////////////////////////////// SelectionVector::SelectionVector(void) : insn(NULL), reg(NULL), regNum(0), isSrc(0) {} /////////////////////////////////////////////////////////////////////////// // SelectionBlock /////////////////////////////////////////////////////////////////////////// SelectionBlock::SelectionBlock(const ir::BasicBlock *bb) : bb(bb) {} void SelectionBlock::append(ir::Register reg) { tmp.push_back(reg); } void SelectionBlock::append(SelectionInstruction *insn) { this->insnList.push_back(insn); insn->parent = this; } void SelectionBlock::prepend(SelectionInstruction *insn) { this->insnList.push_front(insn); insn->parent = this; } void SelectionBlock::append(SelectionVector *vec) { this->vectorList.push_back(vec); } /////////////////////////////////////////////////////////////////////////// // Maximal munch selection on DAG /////////////////////////////////////////////////////////////////////////// /*! All instructions in a block are organized into a DAG */ class SelectionDAG { public: INLINE SelectionDAG(const ir::Instruction &insn) : insn(insn), mergeable(0), childNum(insn.getSrcNum()), isRoot(0) { for (uint32_t childID = 0; childID < childNum; ++childID) this->child[childID] = NULL; } /*! Mergeable are non-root instructions with valid sources */ INLINE void setAsMergeable(uint32_t which) { mergeable|=(1< opcodes; /*! Number of instruction generated */ uint32_t insnNum; /*! Cost of the pattern */ uint32_t cost; }; /*! Store and sort all the patterns. This is our global library we use for the * code selection */ class SelectionLibrary { public: /*! Will register all the patterns */ SelectionLibrary(void); /*! Release and destroy all the registered patterns */ ~SelectionLibrary(void); /*! Insert the given pattern for all associated opcodes */ template void insert(void); /*! One list of pattern per opcode */ typedef vector PatternList; /*! All lists of patterns properly sorted per opcode */ PatternList patterns[ir::OP_INVALID]; /*! All patterns to free */ vector toFree; }; /////////////////////////////////////////////////////////////////////////// // Code selection internal implementation /////////////////////////////////////////////////////////////////////////// /*! Actual implementation of the instruction selection engine */ class Selection::Opaque { public: /*! simdWidth is the default width for the instructions */ Opaque(GenContext &ctx); /*! Release everything */ virtual ~Opaque(void); /*! Implements the instruction selection itself */ void select(void); /*! Start a backward generation (from the end of the block) */ void startBackwardGeneration(void); /*! End backward code generation and output the code in the block */ void endBackwardGeneration(void); /*! Implement public class */ uint32_t getLargestBlockSize(void) const; /*! Implement public class */ INLINE uint32_t getVectorNum(void) const { return this->vectorNum; } /*! Implement public class */ INLINE ir::Register replaceSrc(SelectionInstruction *insn, uint32_t regID); /*! Implement public class */ INLINE ir::Register replaceDst(SelectionInstruction *insn, uint32_t regID); /*! spill a register (insert spill/unspill instructions) */ INLINE void spillReg(ir::Register reg, uint32_t registerPool); /*! Implement public class */ INLINE uint32_t getRegNum(void) const { return file.regNum(); } /*! Implements public interface */ bool isScalarOrBool(ir::Register reg) const; /*! Implements public interface */ INLINE ir::RegisterData getRegisterData(ir::Register reg) const { return file.get(reg); } /*! Implement public class */ INLINE ir::RegisterFamily getRegisterFamily(ir::Register reg) const { return file.get(reg).family; } /*! Implement public class */ SelectionInstruction *create(SelectionOpcode, uint32_t dstNum, uint32_t srcNum); /*! Return the selection register from the GenIR one */ GenRegister selReg(ir::Register, ir::Type type = ir::TYPE_FLOAT) const; /*! Compute the nth register part when using SIMD8 with Qn (n in 2,3,4) */ GenRegister selRegQn(ir::Register, uint32_t quarter, ir::Type type = ir::TYPE_FLOAT) const; /*! Size of the stack (should be large enough) */ enum { MAX_STATE_NUM = 16 }; /*! Push the current instruction state */ INLINE void push(void) { assert(stateNum < MAX_STATE_NUM); stack[stateNum++] = curr; } /*! Pop the latest pushed state */ INLINE void pop(void) { assert(stateNum > 0); curr = stack[--stateNum]; } /*! Create a new register in the register file and append it in the * temporary list of the current block */ INLINE ir::Register reg(ir::RegisterFamily family) { GBE_ASSERT(block != NULL); const ir::Register reg = file.append(family); block->append(reg); return reg; } /*! Append a block at the block stream tail. It becomes the current block */ void appendBlock(const ir::BasicBlock &bb); /*! Append an instruction in the current block */ SelectionInstruction *appendInsn(SelectionOpcode, uint32_t dstNum, uint32_t srcNum); /*! Append a new vector of registers in the current block */ SelectionVector *appendVector(void); /*! Build a DAG for the basic block (return number of instructions) */ uint32_t buildBasicBlockDAG(const ir::BasicBlock &bb); /*! Perform the selection on the basic block */ void matchBasicBlock(uint32_t insnNum); /*! A root instruction needs to be generated */ bool isRoot(const ir::Instruction &insn) const; /*! To handle selection block allocation */ DECL_POOL(SelectionBlock, blockPool); /*! To handle selection instruction allocation */ LinearAllocator insnAllocator; /*! To handle selection vector allocation */ DECL_POOL(SelectionVector, vecPool); /*! Per register information used with top-down block sweeping */ vector regDAG; /*! Store one DAG per instruction */ vector insnDAG; /*! Owns this structure */ GenContext &ctx; /*! Tail of the code fragment for backward code generation */ intrusive_list bwdList; /*! List of emitted blocks */ intrusive_list blockList; /*! Currently processed block */ SelectionBlock *block; /*! Current instruction state to use */ GenInstructionState curr; /*! We append new registers so we duplicate the function register file */ ir::RegisterFile file; /*! State used to encode the instructions */ GenInstructionState stack[MAX_STATE_NUM]; /*! Maximum number of instructions in the basic blocks */ uint32_t maxInsnNum; /*! Speed up instruction dag allocation */ DECL_POOL(SelectionDAG, dagPool); /*! Total number of registers in the function we encode */ uint32_t regNum; /*! Number of states currently pushed */ uint32_t stateNum; /*! Number of vector allocated */ uint32_t vectorNum; /*! If true, generate code backward */ bool bwdCodeGeneration; /*! To make function prototypes more readable */ typedef const GenRegister &Reg; #define ALU1(OP) \ INLINE void OP(Reg dst, Reg src) { ALU1(SEL_OP_##OP, dst, src); } #define ALU1WithTemp(OP) \ INLINE void OP(Reg dst, Reg src, Reg temp) { ALU1WithTemp(SEL_OP_##OP, dst, src, temp); } #define ALU2(OP) \ INLINE void OP(Reg dst, Reg src0, Reg src1) { ALU2(SEL_OP_##OP, dst, src0, src1); } #define ALU2WithTemp(OP) \ INLINE void OP(Reg dst, Reg src0, Reg src1, Reg temp) { ALU2WithTemp(SEL_OP_##OP, dst, src0, src1, temp); } #define ALU3(OP) \ INLINE void OP(Reg dst, Reg src0, Reg src1, Reg src2) { ALU3(SEL_OP_##OP, dst, src0, src1, src2); } #define I64Shift(OP) \ INLINE void OP(Reg dst, Reg src0, Reg src1, GenRegister tmp[7]) { I64Shift(SEL_OP_##OP, dst, src0, src1, tmp); } ALU1(MOV) ALU1WithTemp(MOV_DF) ALU1WithTemp(LOAD_DF_IMM) ALU1(LOAD_INT64_IMM) ALU1(RNDZ) ALU1(RNDE) ALU2(SEL) ALU2(SEL_INT64) ALU1(NOT) ALU2(AND) ALU2(OR) ALU2(XOR) ALU2(I64AND) ALU2(I64OR) ALU2(I64XOR) ALU2(SHR) ALU2(SHL) ALU2(RSR) ALU2(RSL) ALU2(ASR) ALU2(ADD) ALU2WithTemp(I64ADD) ALU2WithTemp(I64SUB) ALU2(MUL) ALU1(FRC) ALU1(RNDD) ALU1(RNDU) ALU2(MACH) ALU1(LZD) ALU3(MAD) ALU2WithTemp(MUL_HI) ALU1(FBH) ALU1(FBL) ALU2WithTemp(HADD) ALU2WithTemp(RHADD) ALU2(UPSAMPLE_SHORT) ALU2(UPSAMPLE_INT) ALU2(UPSAMPLE_LONG) ALU1WithTemp(CONVI_TO_I64) ALU1WithTemp(CONVF_TO_I64) ALU1(CONVI64_TO_I) I64Shift(I64SHL) I64Shift(I64SHR) I64Shift(I64ASR) #undef ALU1 #undef ALU1WithTemp #undef ALU2 #undef ALU2WithTemp #undef ALU3 #undef I64Shift /*! Convert 64-bit integer to 32-bit float */ void CONVI64_TO_F(Reg dst, Reg src, GenRegister tmp[4]); /*! Saturated 64bit x*y + z */ void I64MADSAT(Reg dst, Reg src0, Reg src1, Reg src2, GenRegister tmp[10]); /*! High 64bit of x*y */ void I64_MUL_HI(Reg dst, Reg src0, Reg src1, GenRegister tmp[10]); /*! (x+y)>>1 without mod. overflow */ void I64HADD(Reg dst, Reg src0, Reg src1, GenRegister tmp[4]); /*! (x+y+1)>>1 without mod. overflow */ void I64RHADD(Reg dst, Reg src0, Reg src1, GenRegister tmp[4]); /*! Shift a 64-bit integer */ void I64Shift(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1, GenRegister tmp[7]); /*! Compare 64-bit integer */ void I64CMP(uint32_t conditional, Reg src0, Reg src1, GenRegister tmp[3]); /*! Saturated addition of 64-bit integer */ void I64SATADD(Reg dst, Reg src0, Reg src1, GenRegister tmp[6]); /*! Saturated subtraction of 64-bit integer */ void I64SATSUB(Reg dst, Reg src0, Reg src1, GenRegister tmp[6]); /*! Encode a barrier instruction */ void BARRIER(GenRegister src); /*! Encode a barrier instruction */ void FENCE(GenRegister dst); /*! Encode a label instruction */ void LABEL(ir::LabelIndex label); /*! Jump indexed instruction */ void JMPI(Reg src, ir::LabelIndex target); /*! Compare instructions */ void CMP(uint32_t conditional, Reg src0, Reg src1); /*! Select instruction with embedded comparison */ void SEL_CMP(uint32_t conditional, Reg dst, Reg src0, Reg src1); /* Constant buffer move instruction */ void INDIRECT_MOVE(Reg dst, Reg src); /*! EOT is used to finish GPGPU threads */ void EOT(void); /*! No-op */ void NOP(void); /*! Wait instruction (used for the barrier) */ void WAIT(void); /*! Atomic instruction */ void ATOMIC(Reg dst, uint32_t function, uint32_t srcNum, Reg src0, Reg src1, Reg src2, uint32_t bti); /*! Read 64 bits float/int array */ void READ64(Reg addr, Reg tempAddr, const GenRegister *dst, uint32_t elemNum, uint32_t valueNum, uint32_t bti); /*! Write 64 bits float/int array */ void WRITE64(Reg addr, const GenRegister *src, uint32_t srcNum, const GenRegister *dst, uint32_t dstNum, uint32_t bti); /*! Untyped read (up to 4 elements) */ void UNTYPED_READ(Reg addr, const GenRegister *dst, uint32_t elemNum, uint32_t bti); /*! Untyped write (up to 4 elements) */ void UNTYPED_WRITE(Reg addr, const GenRegister *src, uint32_t elemNum, uint32_t bti); /*! Byte gather (for unaligned bytes, shorts and ints) */ void BYTE_GATHER(Reg dst, Reg addr, uint32_t elemSize, uint32_t bti); /*! Byte scatter (for unaligned bytes, shorts and ints) */ void BYTE_SCATTER(Reg addr, Reg src, uint32_t elemSize, uint32_t bti); /*! DWord scatter (for constant cache read) */ void DWORD_GATHER(Reg dst, Reg addr, uint32_t bti); /*! Extended math function (2 arguments) */ void MATH(Reg dst, uint32_t function, Reg src0, Reg src1); /*! Extended math function (1 argument) */ void MATH(Reg dst, uint32_t function, Reg src); /*! Encode unary instructions */ void ALU1(SelectionOpcode opcode, Reg dst, Reg src); /*! Encode unary with temp reg instructions */ void ALU1WithTemp(SelectionOpcode opcode, Reg dst, Reg src0, Reg temp); /*! Encode binary instructions */ void ALU2(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1); /*! Encode binary with temp reg instructions */ void ALU2WithTemp(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1, Reg temp); /*! Encode ternary instructions */ void ALU3(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1, Reg src2); /*! Encode sample instructions */ void SAMPLE(GenRegister *dst, uint32_t dstNum, GenRegister *src, uint32_t srcNum, GenRegister *msgPayloads, uint32_t msgNum, uint32_t bti, uint32_t sampler); /*! Encode typed write instructions */ void TYPED_WRITE(GenRegister *src, uint32_t srcNum, GenRegister *msgs, uint32_t msgNum, uint32_t bti); /*! Get image information */ void GET_IMAGE_INFO(uint32_t type, GenRegister *dst, uint32_t dst_num, uint32_t bti); /*! Multiply 64-bit integers */ void I64MUL(Reg dst, Reg src0, Reg src1, GenRegister tmp[6]); /*! 64-bit integer division */ void I64DIV(Reg dst, Reg src0, Reg src1, GenRegister tmp[14]); /*! 64-bit integer remainder of division */ void I64REM(Reg dst, Reg src0, Reg src1, GenRegister tmp[14]); /*! Use custom allocators */ GBE_CLASS(Opaque); friend class SelectionBlock; friend class SelectionInstruction; }; /////////////////////////////////////////////////////////////////////////// // Helper function /////////////////////////////////////////////////////////////////////////// /*! Directly mark all sources as root (when no match is found) */ static void markAllChildren(SelectionDAG &dag) { // Do not merge anything, so all sources become roots for (uint32_t childID = 0; childID < dag.childNum; ++childID) if (dag.child[childID]) dag.child[childID]->isRoot = 1; } /*! Helper function to figure if two sources are the same */ static bool sourceMatch(SelectionDAG *src0DAG, uint32_t src0ID, SelectionDAG *src1DAG, uint32_t src1ID) { GBE_ASSERT(src0DAG && src1DAG); // Ensure they are the same physical registers const ir::Register src0 = src0DAG->insn.getSrc(src0ID); const ir::Register src1 = src1DAG->insn.getSrc(src1ID); if (src0 != src1) return false; // Ensure they contain the same values return src0DAG->child[src0ID] == src1DAG->child[src1ID]; } Selection::Opaque::Opaque(GenContext &ctx) : ctx(ctx), block(NULL), curr(ctx.getSimdWidth()), file(ctx.getFunction().getRegisterFile()), maxInsnNum(ctx.getFunction().getLargestBlockSize()), dagPool(maxInsnNum), stateNum(0), vectorNum(0), bwdCodeGeneration(false) { const ir::Function &fn = ctx.getFunction(); this->regNum = fn.regNum(); this->regDAG.resize(regNum); this->insnDAG.resize(maxInsnNum); } Selection::Opaque::~Opaque(void) { for (auto it = blockList.begin(); it != blockList.end();) { SelectionBlock &block = *it; ++it; this->deleteSelectionBlock(&block); } } SelectionInstruction* Selection::Opaque::create(SelectionOpcode opcode, uint32_t dstNum, uint32_t srcNum) { const size_t regSize = (dstNum+srcNum)*sizeof(GenRegister); const size_t size = sizeof(SelectionInstruction) + regSize; void *ptr = insnAllocator.allocate(size); return new (ptr) SelectionInstruction(opcode, dstNum, srcNum); } void Selection::Opaque::startBackwardGeneration(void) { this->bwdCodeGeneration = true; } void Selection::Opaque::endBackwardGeneration(void) { for (auto it = bwdList.rbegin(); it != bwdList.rend();) { SelectionInstruction &insn = *it; auto toRemoveIt = it--; bwdList.erase(toRemoveIt); this->block->prepend(&insn); } this->bwdCodeGeneration = false; } uint32_t Selection::Opaque::getLargestBlockSize(void) const { size_t maxInsnNum = 0; for (const auto &bb : blockList) maxInsnNum = std::max(maxInsnNum, bb.insnList.size()); return uint32_t(maxInsnNum); } void Selection::Opaque::appendBlock(const ir::BasicBlock &bb) { this->block = this->newSelectionBlock(&bb); this->blockList.push_back(this->block); } SelectionInstruction *Selection::Opaque::appendInsn(SelectionOpcode opcode, uint32_t dstNum, uint32_t srcNum) { GBE_ASSERT(this->block != NULL); SelectionInstruction *insn = this->create(opcode, dstNum, srcNum); if (this->bwdCodeGeneration) this->bwdList.push_back(insn); else this->block->append(insn); insn->state = this->curr; return insn; } SelectionVector *Selection::Opaque::appendVector(void) { GBE_ASSERT(this->block != NULL); SelectionVector *vector = this->newSelectionVector(); if (this->bwdCodeGeneration) vector->insn = this->bwdList.back(); else vector->insn = this->block->insnList.back(); this->block->append(vector); this->vectorNum++; return vector; } void Selection::Opaque::spillReg(ir::Register spilledReg, uint32_t registerPool) { assert(registerPool != 0); const uint32_t simdWidth = ctx.getSimdWidth(); const uint32_t dstStart = registerPool + 1; const uint32_t srcStart = registerPool + 1; uint32_t ptr = ctx.allocateScratchMem(typeSize(GEN_TYPE_D)*simdWidth); for (auto &block : blockList) for (auto &insn : block.insnList) { // spill / unspill insn should be skipped when do spilling if(insn.opcode == SEL_OP_SPILL_REG || insn.opcode == SEL_OP_UNSPILL_REG) continue; const uint32_t srcNum = insn.srcNum, dstNum = insn.dstNum; for (uint32_t srcID = 0; srcID < srcNum; ++srcID) { const GenRegister selReg = insn.src(srcID); const ir::Register reg = selReg.reg(); if(reg == spilledReg && selReg.file == GEN_GENERAL_REGISTER_FILE && selReg.physical == 0) { GBE_ASSERT(srcID < 5); SelectionInstruction *unspill = this->create(SEL_OP_UNSPILL_REG, 1, 0); unspill->state = GenInstructionState(simdWidth); unspill->dst(0) = GenRegister(GEN_GENERAL_REGISTER_FILE, srcStart+srcID, 0, selReg.type, selReg.vstride, selReg.width, selReg.hstride); GenRegister src = insn.src(srcID); // change nr/subnr, keep other register settings src.nr = srcStart+srcID; src.subnr=0; src.physical=1; insn.src(srcID) = src; unspill->extra.scratchOffset = ptr; unspill->extra.scratchMsgHeader = registerPool; insn.prepend(*unspill); } } for (uint32_t dstID = 0; dstID < dstNum; ++dstID) { const GenRegister selReg = insn.dst(dstID); const ir::Register reg = selReg.reg(); if(reg == spilledReg && selReg.file == GEN_GENERAL_REGISTER_FILE && selReg.physical == 0) { GBE_ASSERT(dstID < 5); SelectionInstruction *spill = this->create(SEL_OP_SPILL_REG, 0, 1); spill->state = GenInstructionState(simdWidth); spill->src(0) =GenRegister(GEN_GENERAL_REGISTER_FILE, dstStart + dstID, 0, selReg.type, selReg.vstride, selReg.width, selReg.hstride); GenRegister dst = insn.dst(dstID); // change nr/subnr, keep other register settings dst.physical =1; dst.nr = dstStart+dstID; dst.subnr = 0; insn.dst(dstID)= dst; spill->extra.scratchOffset = ptr; spill->extra.scratchMsgHeader = registerPool; insn.append(*spill); } } } } ir::Register Selection::Opaque::replaceSrc(SelectionInstruction *insn, uint32_t regID) { SelectionBlock *block = insn->parent; const uint32_t simdWidth = ctx.getSimdWidth(); ir::Register tmp; // This will append the temporary register in the instruction block this->block = block; tmp = this->reg(ir::FAMILY_DWORD); // Generate the MOV instruction and replace the register in the instruction SelectionInstruction *mov = this->create(SEL_OP_MOV, 1, 1); mov->src(0) = GenRegister::retype(insn->src(regID), GEN_TYPE_F); mov->state = GenInstructionState(simdWidth); insn->src(regID) = mov->dst(0) = GenRegister::fxgrf(simdWidth, tmp); insn->prepend(*mov); return tmp; } ir::Register Selection::Opaque::replaceDst(SelectionInstruction *insn, uint32_t regID) { SelectionBlock *block = insn->parent; uint32_t simdWidth = ctx.getSimdWidth(); ir::Register tmp; ir::RegisterFamily f = file.get(insn->dst(regID).reg()).family; int genType = f == ir::FAMILY_QWORD ? GEN_TYPE_DF : GEN_TYPE_F; GenRegister gr; // This will append the temporary register in the instruction block this->block = block; tmp = this->reg(f); // Generate the MOV instruction and replace the register in the instruction SelectionInstruction *mov = this->create(SEL_OP_MOV, 1, 1); mov->dst(0) = GenRegister::retype(insn->dst(regID), genType); mov->state = GenInstructionState(simdWidth); gr = f == ir::FAMILY_QWORD ? GenRegister::dfxgrf(simdWidth, tmp) : GenRegister::fxgrf(simdWidth, tmp); insn->dst(regID) = mov->src(0) = gr; insn->append(*mov); return tmp; } bool Selection::Opaque::isScalarOrBool(ir::Register reg) const { if (ctx.isScalarReg(reg)) return true; else { const ir::RegisterFamily family = file.get(reg).family; return family == ir::FAMILY_BOOL; } } #define SEL_REG(SIMD16, SIMD8, SIMD1) \ if (ctx.sel->isScalarOrBool(reg) == true) \ return GenRegister::retype(GenRegister::SIMD1(reg), genType); \ else if (simdWidth == 8) \ return GenRegister::retype(GenRegister::SIMD8(reg), genType); \ else { \ GBE_ASSERT (simdWidth == 16); \ return GenRegister::retype(GenRegister::SIMD16(reg), genType); \ } GenRegister Selection::Opaque::selReg(ir::Register reg, ir::Type type) const { using namespace ir; const uint32_t genType = getGenType(type); const uint32_t simdWidth = ctx.getSimdWidth(); const RegisterData data = file.get(reg); const RegisterFamily family = data.family; switch (family) { case FAMILY_BOOL: SEL_REG(uw1grf, uw1grf, uw1grf); break; case FAMILY_WORD: SEL_REG(uw16grf, uw8grf, uw1grf); break; case FAMILY_BYTE: SEL_REG(ub16grf, ub8grf, ub1grf); break; case FAMILY_DWORD: SEL_REG(f16grf, f8grf, f1grf); break; case FAMILY_QWORD: SEL_REG(df16grf, df8grf, df1grf); break; default: NOT_SUPPORTED; } GBE_ASSERT(false); return GenRegister(); } #undef SEL_REG GenRegister Selection::Opaque::selRegQn(ir::Register reg, uint32_t q, ir::Type type) const { GenRegister sreg = this->selReg(reg, type); sreg.quarter = q; return sreg; } /*! Syntactic sugar for method declaration */ typedef const GenRegister &Reg; void Selection::Opaque::LABEL(ir::LabelIndex index) { SelectionInstruction *insn = this->appendInsn(SEL_OP_LABEL, 0, 0); insn->index = uint16_t(index); } void Selection::Opaque::BARRIER(GenRegister src) { SelectionInstruction *insn = this->appendInsn(SEL_OP_BARRIER, 0, 1); insn->src(0) = src; } void Selection::Opaque::FENCE(GenRegister dst) { SelectionInstruction *insn = this->appendInsn(SEL_OP_FENCE, 1, 0); insn->dst(0) = dst; } void Selection::Opaque::JMPI(Reg src, ir::LabelIndex index) { SelectionInstruction *insn = this->appendInsn(SEL_OP_JMPI, 0, 1); insn->src(0) = src; insn->index = uint16_t(index); } void Selection::Opaque::CMP(uint32_t conditional, Reg src0, Reg src1) { SelectionInstruction *insn = this->appendInsn(SEL_OP_CMP, 0, 2); insn->src(0) = src0; insn->src(1) = src1; insn->extra.function = conditional; } void Selection::Opaque::SEL_CMP(uint32_t conditional, Reg dst, Reg src0, Reg src1) { SelectionInstruction *insn = this->appendInsn(SEL_OP_SEL_CMP, 1, 2); insn->dst(0) = dst; insn->src(0) = src0; insn->src(1) = src1; insn->extra.function = conditional; } void Selection::Opaque::INDIRECT_MOVE(Reg dst, Reg src) { SelectionInstruction *insn = this->appendInsn(SEL_OP_INDIRECT_MOVE, 1, 1); insn->dst(0) = dst; insn->src(0) = src; } void Selection::Opaque::ATOMIC(Reg dst, uint32_t function, uint32_t srcNum, Reg src0, Reg src1, Reg src2, uint32_t bti) { SelectionInstruction *insn = this->appendInsn(SEL_OP_ATOMIC, 1, srcNum); insn->dst(0) = dst; insn->src(0) = src0; if(srcNum > 1) insn->src(1) = src1; if(srcNum > 2) insn->src(2) = src2; insn->extra.function = function; insn->extra.elem = bti; SelectionVector *vector = this->appendVector(); vector->regNum = srcNum; vector->reg = &insn->src(0); vector->isSrc = 1; } void Selection::Opaque::EOT(void) { this->appendInsn(SEL_OP_EOT, 0, 0); } void Selection::Opaque::NOP(void) { this->appendInsn(SEL_OP_NOP, 0, 0); } void Selection::Opaque::WAIT(void) { this->appendInsn(SEL_OP_WAIT, 0, 0); } /* elemNum contains all the temporary register and the real destination registers.*/ void Selection::Opaque::READ64(Reg addr, Reg tempAddr, const GenRegister *dst, uint32_t elemNum, uint32_t valueNum, uint32_t bti) { SelectionInstruction *insn = this->appendInsn(SEL_OP_READ64, elemNum + 1, 1); SelectionVector *srcVector = this->appendVector(); SelectionVector *dstVector = this->appendVector(); /* temporary addr register is to be modified, set it to dst registers.*/ insn->dst(0) = tempAddr; // Regular instruction to encode for (uint32_t elemID = 0; elemID < elemNum; ++elemID) insn->dst(elemID + 1) = dst[elemID]; insn->src(0) = addr; insn->extra.function = bti; insn->extra.elem = valueNum; // Only the temporary registers need contiguous allocation dstVector->regNum = elemNum - valueNum; dstVector->isSrc = 0; dstVector->reg = &insn->dst(1); // Source cannot be scalar (yet) srcVector->regNum = 1; srcVector->isSrc = 1; srcVector->reg = &insn->src(0); } void Selection::Opaque::UNTYPED_READ(Reg addr, const GenRegister *dst, uint32_t elemNum, uint32_t bti) { SelectionInstruction *insn = this->appendInsn(SEL_OP_UNTYPED_READ, elemNum, 1); SelectionVector *srcVector = this->appendVector(); SelectionVector *dstVector = this->appendVector(); // Regular instruction to encode for (uint32_t elemID = 0; elemID < elemNum; ++elemID) insn->dst(elemID) = dst[elemID]; insn->src(0) = addr; insn->extra.function = bti; insn->extra.elem = elemNum; // Sends require contiguous allocation dstVector->regNum = elemNum; dstVector->isSrc = 0; dstVector->reg = &insn->dst(0); // Source cannot be scalar (yet) srcVector->regNum = 1; srcVector->isSrc = 1; srcVector->reg = &insn->src(0); } /* elemNum contains all the temporary register and the real data registers.*/ void Selection::Opaque::WRITE64(Reg addr, const GenRegister *src, uint32_t srcNum, const GenRegister *dst, uint32_t dstNum, uint32_t bti) { SelectionInstruction *insn = this->appendInsn(SEL_OP_WRITE64, dstNum, srcNum + 1); SelectionVector *vector = this->appendVector(); // Regular instruction to encode insn->src(0) = addr; for (uint32_t elemID = 0; elemID < srcNum; ++elemID) insn->src(elemID + 1) = src[elemID]; for (uint32_t elemID = 0; elemID < dstNum; ++elemID) insn->dst(elemID) = dst[elemID]; insn->extra.function = bti; insn->extra.elem = srcNum; // Only the addr + temporary registers need to be contiguous. vector->regNum = dstNum; vector->reg = &insn->dst(0); vector->isSrc = 1; } void Selection::Opaque::UNTYPED_WRITE(Reg addr, const GenRegister *src, uint32_t elemNum, uint32_t bti) { SelectionInstruction *insn = this->appendInsn(SEL_OP_UNTYPED_WRITE, 0, elemNum+1); SelectionVector *vector = this->appendVector(); // Regular instruction to encode insn->src(0) = addr; for (uint32_t elemID = 0; elemID < elemNum; ++elemID) insn->src(elemID+1) = src[elemID]; insn->extra.function = bti; insn->extra.elem = elemNum; // Sends require contiguous allocation for the sources vector->regNum = elemNum+1; vector->reg = &insn->src(0); vector->isSrc = 1; } void Selection::Opaque::BYTE_GATHER(Reg dst, Reg addr, uint32_t elemSize, uint32_t bti) { SelectionInstruction *insn = this->appendInsn(SEL_OP_BYTE_GATHER, 1, 1); SelectionVector *srcVector = this->appendVector(); SelectionVector *dstVector = this->appendVector(); // Instruction to encode insn->src(0) = addr; insn->dst(0) = dst; insn->extra.function = bti; insn->extra.elem = elemSize; // byte gather requires vector in the sense that scalar are not allowed // (yet) dstVector->regNum = 1; dstVector->isSrc = 0; dstVector->reg = &insn->dst(0); srcVector->regNum = 1; srcVector->isSrc = 1; srcVector->reg = &insn->src(0); } void Selection::Opaque::BYTE_SCATTER(Reg addr, Reg src, uint32_t elemSize, uint32_t bti) { SelectionInstruction *insn = this->appendInsn(SEL_OP_BYTE_SCATTER, 0, 2); SelectionVector *vector = this->appendVector(); // Instruction to encode insn->src(0) = addr; insn->src(1) = src; insn->extra.function = bti; insn->extra.elem = elemSize; // value and address are contiguous in the send vector->regNum = 2; vector->isSrc = 1; vector->reg = &insn->src(0); } void Selection::Opaque::DWORD_GATHER(Reg dst, Reg addr, uint32_t bti) { SelectionInstruction *insn = this->appendInsn(SEL_OP_DWORD_GATHER, 1, 1); insn->src(0) = addr; insn->dst(0) = dst; insn->extra.function = bti; } void Selection::Opaque::MATH(Reg dst, uint32_t function, Reg src0, Reg src1) { SelectionInstruction *insn = this->appendInsn(SEL_OP_MATH, 1, 2); insn->dst(0) = dst; insn->src(0) = src0; insn->src(1) = src1; insn->extra.function = function; } void Selection::Opaque::MATH(Reg dst, uint32_t function, Reg src) { SelectionInstruction *insn = this->appendInsn(SEL_OP_MATH, 1, 1); insn->dst(0) = dst; insn->src(0) = src; insn->extra.function = function; } void Selection::Opaque::I64MUL(Reg dst, Reg src0, Reg src1, GenRegister tmp[6]) { SelectionInstruction *insn = this->appendInsn(SEL_OP_I64MUL, 7, 2); insn->dst(0) = dst; insn->src(0) = src0; insn->src(1) = src1; for(int i = 0; i < 6; i++) insn->dst(i + 1) = tmp[i]; } void Selection::Opaque::I64DIV(Reg dst, Reg src0, Reg src1, GenRegister tmp[14]) { SelectionInstruction *insn = this->appendInsn(SEL_OP_I64DIV, 15, 2); insn->dst(0) = dst; insn->src(0) = src0; insn->src(1) = src1; for(int i = 0; i < 14; i++) insn->dst(i + 1) = tmp[i]; } void Selection::Opaque::I64REM(Reg dst, Reg src0, Reg src1, GenRegister tmp[14]) { SelectionInstruction *insn = this->appendInsn(SEL_OP_I64REM, 15, 2); insn->dst(0) = dst; insn->src(0) = src0; insn->src(1) = src1; for(int i = 0; i < 14; i++) insn->dst(i + 1) = tmp[i]; } void Selection::Opaque::ALU1(SelectionOpcode opcode, Reg dst, Reg src) { SelectionInstruction *insn = this->appendInsn(opcode, 1, 1); insn->dst(0) = dst; insn->src(0) = src; } void Selection::Opaque::ALU1WithTemp(SelectionOpcode opcode, Reg dst, Reg src, Reg temp) { SelectionInstruction *insn = this->appendInsn(opcode, 2, 1); insn->dst(0) = dst; insn->src(0) = src; insn->dst(1) = temp; } void Selection::Opaque::ALU2(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1) { SelectionInstruction *insn = this->appendInsn(opcode, 1, 2); insn->dst(0) = dst; insn->src(0) = src0; insn->src(1) = src1; } void Selection::Opaque::ALU2WithTemp(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1, Reg temp) { SelectionInstruction *insn = this->appendInsn(opcode, 2, 2); insn->dst(0) = dst; insn->src(0) = src0; insn->src(1) = src1; insn->dst(1) = temp; } void Selection::Opaque::ALU3(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1, Reg src2) { SelectionInstruction *insn = this->appendInsn(opcode, 1, 3); insn->dst(0) = dst; insn->src(0) = src0; insn->src(1) = src1; insn->src(2) = src2; } void Selection::Opaque::I64CMP(uint32_t conditional, Reg src0, Reg src1, GenRegister tmp[3]) { SelectionInstruction *insn = this->appendInsn(SEL_OP_I64CMP, 3, 2); insn->src(0) = src0; insn->src(1) = src1; for(int i=0; i<3; i++) insn->dst(i) = tmp[i]; insn->extra.function = conditional; } void Selection::Opaque::I64SATADD(Reg dst, Reg src0, Reg src1, GenRegister tmp[6]) { SelectionInstruction *insn = this->appendInsn(SEL_OP_I64SATADD, 7, 2); insn->dst(0) = dst; insn->src(0) = src0; insn->src(1) = src1; for(int i=0; i<6; i++) insn->dst(i + 1) = tmp[i]; } void Selection::Opaque::I64SATSUB(Reg dst, Reg src0, Reg src1, GenRegister tmp[6]) { SelectionInstruction *insn = this->appendInsn(SEL_OP_I64SATSUB, 7, 2); insn->dst(0) = dst; insn->src(0) = src0; insn->src(1) = src1; for(int i=0; i<6; i++) insn->dst(i + 1) = tmp[i]; } void Selection::Opaque::CONVI64_TO_F(Reg dst, Reg src, GenRegister tmp[4]) { SelectionInstruction *insn = this->appendInsn(SEL_OP_CONVI64_TO_F, 5, 1); insn->dst(0) = dst; insn->src(0) = src; for(int i = 0; i < 4; i ++) insn->dst(i + 1) = tmp[i]; } void Selection::Opaque::I64MADSAT(Reg dst, Reg src0, Reg src1, Reg src2, GenRegister tmp[10]) { SelectionInstruction *insn = this->appendInsn(SEL_OP_I64MADSAT, 11, 3); insn->dst(0) = dst; insn->src(0) = src0; insn->src(1) = src1; insn->src(2) = src2; for(int i = 0; i < 10; i ++) insn->dst(i + 1) = tmp[i]; } void Selection::Opaque::I64_MUL_HI(Reg dst, Reg src0, Reg src1, GenRegister tmp[10]) { SelectionInstruction *insn = this->appendInsn(SEL_OP_I64_MUL_HI, 11, 2); insn->dst(0) = dst; insn->src(0) = src0; insn->src(1) = src1; for(int i = 0; i < 10; i ++) insn->dst(i + 1) = tmp[i]; } void Selection::Opaque::I64HADD(Reg dst, Reg src0, Reg src1, GenRegister tmp[4]) { SelectionInstruction *insn = this->appendInsn(SEL_OP_I64HADD, 5, 2); insn->dst(0) = dst; insn->src(0) = src0; insn->src(1) = src1; for(int i = 0; i < 4; i ++) insn->dst(i + 1) = tmp[i]; } void Selection::Opaque::I64RHADD(Reg dst, Reg src0, Reg src1, GenRegister tmp[4]) { SelectionInstruction *insn = this->appendInsn(SEL_OP_I64RHADD, 5, 2); insn->dst(0) = dst; insn->src(0) = src0; insn->src(1) = src1; for(int i = 0; i < 4; i ++) insn->dst(i + 1) = tmp[i]; } void Selection::Opaque::I64Shift(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1, GenRegister tmp[7]) { SelectionInstruction *insn = this->appendInsn(opcode, 8, 2); insn->dst(0) = dst; insn->src(0) = src0; insn->src(1) = src1; for(int i = 0; i < 7; i ++) insn->dst(i + 1) = tmp[i]; } // Boiler plate to initialize the selection library at c++ pre-main static SelectionLibrary *selLib = NULL; static void destroySelectionLibrary(void) { GBE_DELETE(selLib); } static struct SelectionLibraryInitializer { SelectionLibraryInitializer(void) { selLib = GBE_NEW_NO_ARG(SelectionLibrary); atexit(destroySelectionLibrary); } } selectionLibraryInitializer; bool Selection::Opaque::isRoot(const ir::Instruction &insn) const { if (insn.getDstNum() > 1 || insn.hasSideEffect() || insn.isMemberOf() || insn.isMemberOf()) return true; // No side effect, not a branch and no destination? Impossible GBE_ASSERT(insn.getDstNum() == 1); // Root if alive outside the block. // XXX we should use Value and not registers in liveness info const ir::BasicBlock *insnBlock = insn.getParent(); const ir::Liveness &liveness = this->ctx.getLiveness(); const ir::Liveness::LiveOut &liveOut = liveness.getLiveOut(insnBlock); const ir::Register reg = insn.getDst(0); if (liveOut.contains(reg)) return true; // The instruction is only used in the current basic block return false; } uint32_t Selection::Opaque::buildBasicBlockDAG(const ir::BasicBlock &bb) { using namespace ir; // Clear all registers for (uint32_t regID = 0; regID < this->regNum; ++regID) this->regDAG[regID] = NULL; // Build the DAG on the fly uint32_t insnNum = 0; const_cast(bb).foreach([&](const Instruction &insn) { // Build a selectionDAG node for instruction SelectionDAG *dag = this->newSelectionDAG(insn); // Point to non-root children const uint32_t srcNum = insn.getSrcNum(); for (uint32_t srcID = 0; srcID < srcNum; ++srcID) { const ir::Register reg = insn.getSrc(srcID); SelectionDAG *child = this->regDAG[reg]; if (child) { const ir::Instruction &childInsn = child->insn; const uint32_t childSrcNum = childInsn.getSrcNum(); // We can merge a child only if its sources are still valid bool mergeable = true; for (uint32_t otherID = 0; otherID < childSrcNum; ++otherID) { const SelectionDAG *srcDAG = child->child[otherID]; const ir::Register srcReg = childInsn.getSrc(otherID); SelectionDAG *currDAG = this->regDAG[srcReg]; if (srcDAG != currDAG) { mergeable = false; break; } } if (mergeable) dag->setAsMergeable(srcID); dag->child[srcID] = child; } else dag->child[srcID] = NULL; } // Make it a root if we must if (this->isRoot(insn)) dag->isRoot = 1; // Save the DAG <-> instruction mapping this->insnDAG[insnNum++] = dag; // Associate all output registers to this instruction const uint32_t dstNum = insn.getDstNum(); for (uint32_t dstID = 0; dstID < dstNum; ++dstID) { const ir::Register reg = insn.getDst(dstID); this->regDAG[reg] = dag; } }); return insnNum; } void Selection::Opaque::matchBasicBlock(uint32_t insnNum) { // Bottom up code generation for (int32_t insnID = insnNum-1; insnID >= 0; --insnID) { // Process all possible patterns for this instruction SelectionDAG &dag = *insnDAG[insnID]; if (dag.isRoot) { const ir::Instruction &insn = dag.insn; const ir::Opcode opcode = insn.getOpcode(); auto it = selLib->patterns[opcode].begin(); const auto end = selLib->patterns[opcode].end(); // Start a new code fragment this->startBackwardGeneration(); // Try all the patterns from best to worst do { if ((*it)->emit(*this, dag)) break; ++it; } while (it != end); GBE_ASSERT(it != end); // Output the code in the current basic block this->endBackwardGeneration(); } } } void Selection::Opaque::select(void) { using namespace ir; const Function &fn = ctx.getFunction(); // Perform the selection per basic block fn.foreachBlock([&](const BasicBlock &bb) { this->dagPool.rewind(); this->appendBlock(bb); const uint32_t insnNum = this->buildBasicBlockDAG(bb); this->matchBasicBlock(insnNum); }); } void Selection::Opaque::SAMPLE(GenRegister *dst, uint32_t dstNum, GenRegister *src, uint32_t srcNum, GenRegister *msgPayloads, uint32_t msgNum, uint32_t bti, uint32_t sampler) { SelectionInstruction *insn = this->appendInsn(SEL_OP_SAMPLE, dstNum, msgNum + srcNum); SelectionVector *dstVector = this->appendVector(); SelectionVector *msgVector = this->appendVector(); // Regular instruction to encode for (uint32_t elemID = 0; elemID < dstNum; ++elemID) insn->dst(elemID) = dst[elemID]; for (uint32_t elemID = 0; elemID < msgNum; ++elemID) insn->src(elemID) = msgPayloads[elemID]; for (uint32_t elemID = 0; elemID < srcNum; ++elemID) insn->src(msgNum + elemID) = src[elemID]; // Sends require contiguous allocation dstVector->regNum = dstNum; dstVector->isSrc = 0; dstVector->reg = &insn->dst(0); // Only the messages require contiguous registers. msgVector->regNum = msgNum; msgVector->isSrc = 1; msgVector->reg = &insn->src(0); insn->extra.function = bti; insn->extra.elem = sampler; } /////////////////////////////////////////////////////////////////////////// // Code selection public implementation /////////////////////////////////////////////////////////////////////////// Selection::Selection(GenContext &ctx) { this->blockList = NULL; this->opaque = GBE_NEW(Selection::Opaque, ctx); } void Selection::Opaque::TYPED_WRITE(GenRegister *src, uint32_t srcNum, GenRegister *msgs, uint32_t msgNum, uint32_t bti) { uint32_t elemID = 0; uint32_t i; SelectionInstruction *insn = this->appendInsn(SEL_OP_TYPED_WRITE, 0, msgNum + srcNum); SelectionVector *msgVector = this->appendVector();; for( i = 0; i < msgNum; ++i, ++elemID) insn->src(elemID) = msgs[i]; for (i = 0; i < srcNum; ++i, ++elemID) insn->src(elemID) = src[i]; insn->extra.function = bti; insn->extra.elem = msgNum; // Sends require contiguous allocation msgVector->regNum = msgNum; msgVector->isSrc = 1; msgVector->reg = &insn->src(0); } void Selection::Opaque::GET_IMAGE_INFO(uint32_t infoType, GenRegister *dst, uint32_t dstNum, uint32_t bti) { SelectionInstruction *insn = this->appendInsn(SEL_OP_GET_IMAGE_INFO, dstNum, 0); for(uint32_t i = 0; i < dstNum; ++i) insn->dst(i) = dst[i]; insn->extra.function = bti; insn->extra.elem = infoType; } Selection::~Selection(void) { GBE_DELETE(this->opaque); } void Selection::select(void) { this->opaque->select(); this->blockList = &this->opaque->blockList; } bool Selection::isScalarOrBool(ir::Register reg) const { return this->opaque->isScalarOrBool(reg); } uint32_t Selection::getLargestBlockSize(void) const { return this->opaque->getLargestBlockSize(); } uint32_t Selection::getVectorNum(void) const { return this->opaque->getVectorNum(); } uint32_t Selection::getRegNum(void) const { return this->opaque->getRegNum(); } ir::RegisterFamily Selection::getRegisterFamily(ir::Register reg) const { return this->opaque->getRegisterFamily(reg); } ir::RegisterData Selection::getRegisterData(ir::Register reg) const { return this->opaque->getRegisterData(reg); } ir::Register Selection::replaceSrc(SelectionInstruction *insn, uint32_t regID) { return this->opaque->replaceSrc(insn, regID); } ir::Register Selection::replaceDst(SelectionInstruction *insn, uint32_t regID) { return this->opaque->replaceDst(insn, regID); } void Selection::spillReg(ir::Register reg, uint32_t registerPool) { this->opaque->spillReg(reg, registerPool); } SelectionInstruction *Selection::create(SelectionOpcode opcode, uint32_t dstNum, uint32_t srcNum) { return this->opaque->create(opcode, dstNum, srcNum); } /////////////////////////////////////////////////////////////////////////// // Implementation of all patterns /////////////////////////////////////////////////////////////////////////// bool canGetRegisterFromImmediate(const ir::Instruction &insn) { using namespace ir; const auto &childInsn = cast(insn); const auto &imm = childInsn.getImmediate(); if(imm.type != TYPE_DOUBLE && imm.type != TYPE_S64 && imm.type != TYPE_U64) return true; return false; } GenRegister getRegisterFromImmediate(ir::Immediate imm) { using namespace ir; switch (imm.type) { case TYPE_U32: return GenRegister::immud(imm.data.u32); case TYPE_S32: return GenRegister::immd(imm.data.s32); case TYPE_FLOAT: return GenRegister::immf(imm.data.f32); case TYPE_U16: return GenRegister::immuw(imm.data.u16); case TYPE_S16: return GenRegister::immw(imm.data.s16); case TYPE_U8: return GenRegister::immuw(imm.data.u8); case TYPE_S8: return GenRegister::immw(imm.data.s8); case TYPE_DOUBLE: return GenRegister::immdf(imm.data.f64); default: NOT_SUPPORTED; return GenRegister::immuw(0); } } /*! Template for the one-to-many instruction patterns */ template class OneToManyPattern : public SelectionPattern { public: /*! Register the pattern for all opcodes of the family */ OneToManyPattern(uint32_t insnNum, uint32_t cost) : SelectionPattern(insnNum, cost) { for (uint32_t op = 0; op < ir::OP_INVALID; ++op) if (ir::isOpcodeFrom(ir::Opcode(op)) == true) this->opcodes.push_back(ir::Opcode(op)); } /*! Call the child method with the proper prototype */ virtual bool emit(Selection::Opaque &sel, SelectionDAG &dag) const { if (static_cast(this)->emitOne(sel, ir::cast(dag.insn))) { markAllChildren(dag); return true; } return false; } }; /*! Declare a naive one-to-many pattern */ #define DECL_PATTERN(FAMILY) \ struct FAMILY##Pattern : public OneToManyPattern #define DECL_CTOR(FAMILY, INSN_NUM, COST) \ FAMILY##Pattern(void) : OneToManyPattern(INSN_NUM, COST) {} /*! Unary instruction patterns */ DECL_PATTERN(UnaryInstruction) { static ir::Type getType(const ir::Opcode opcode, const ir::Type insnType) { if (insnType == ir::TYPE_S64 || insnType == ir::TYPE_U64 || insnType == ir::TYPE_S8 || insnType == ir::TYPE_U8) return insnType; if (opcode == ir::OP_FBH || opcode == ir::OP_FBL) return ir::TYPE_U32; if (insnType == ir::TYPE_S16 || insnType == ir::TYPE_U16) return insnType; return ir::TYPE_FLOAT; } INLINE bool emitOne(Selection::Opaque &sel, const ir::UnaryInstruction &insn) const { const ir::Opcode opcode = insn.getOpcode(); const ir::Type insnType = insn.getType(); const GenRegister dst = sel.selReg(insn.getDst(0), getType(opcode, insnType)); const GenRegister src = sel.selReg(insn.getSrc(0), getType(opcode, insnType)); switch (opcode) { case ir::OP_ABS: if (insn.getType() == ir::TYPE_S32) { const GenRegister src_ = GenRegister::retype(src, GEN_TYPE_D); const GenRegister dst_ = GenRegister::retype(dst, GEN_TYPE_D); sel.MOV(dst_, GenRegister::abs(src_)); } else { GBE_ASSERT(insn.getType() == ir::TYPE_FLOAT); sel.MOV(dst, GenRegister::abs(src)); } break; case ir::OP_MOV: if (dst.isdf()) { ir::Register r = sel.reg(ir::RegisterFamily::FAMILY_QWORD); sel.MOV_DF(dst, src, sel.selReg(r)); } else sel.MOV(dst, src); break; case ir::OP_RNDD: sel.RNDD(dst, src); break; case ir::OP_RNDE: sel.RNDE(dst, src); break; case ir::OP_RNDU: sel.RNDU(dst, src); break; case ir::OP_RNDZ: sel.RNDZ(dst, src); break; case ir::OP_FBH: sel.FBH(dst, src); break; case ir::OP_FBL: sel.FBL(dst, src); break; case ir::OP_COS: sel.MATH(dst, GEN_MATH_FUNCTION_COS, src); break; case ir::OP_SIN: sel.MATH(dst, GEN_MATH_FUNCTION_SIN, src); break; case ir::OP_LOG: sel.MATH(dst, GEN_MATH_FUNCTION_LOG, src); break; case ir::OP_SQR: sel.MATH(dst, GEN_MATH_FUNCTION_SQRT, src); break; case ir::OP_RSQ: sel.MATH(dst, GEN_MATH_FUNCTION_RSQ, src); break; case ir::OP_RCP: sel.MATH(dst, GEN_MATH_FUNCTION_INV, src); break; default: NOT_SUPPORTED; } return true; } DECL_CTOR(UnaryInstruction, 1, 1) }; BVAR(OCL_OPTIMIZE_IMMEDIATE, true); /*! Binary regular instruction pattern */ class BinaryInstructionPattern : public SelectionPattern { public: BinaryInstructionPattern(void) : SelectionPattern(1,1) { for (uint32_t op = 0; op < ir::OP_INVALID; ++op) if (ir::isOpcodeFrom(ir::Opcode(op)) == true) this->opcodes.push_back(ir::Opcode(op)); } bool emitDivRemInst(Selection::Opaque &sel, SelectionDAG &dag, ir::Opcode op) const { using namespace ir; const ir::BinaryInstruction &insn = cast(dag.insn); const Type type = insn.getType(); GenRegister dst = sel.selReg(insn.getDst(0), type); GenRegister src0 = sel.selReg(insn.getSrc(0), type); GenRegister src1 = sel.selReg(insn.getSrc(1), type); const uint32_t simdWidth = sel.curr.execWidth; const RegisterFamily family = getFamily(type); uint32_t function = (op == OP_DIV)? GEN_MATH_FUNCTION_INT_DIV_QUOTIENT : GEN_MATH_FUNCTION_INT_DIV_REMAINDER; //bytes and shorts must be converted to int for DIV and REM per GEN restriction if((family == FAMILY_WORD || family == FAMILY_BYTE)) { GenRegister tmp0, tmp1; ir::Register reg = sel.reg(FAMILY_DWORD); tmp0 = GenRegister::udxgrf(simdWidth, reg); tmp0 = GenRegister::retype(tmp0, GEN_TYPE_D); sel.MOV(tmp0, src0); tmp1 = GenRegister::udxgrf(simdWidth, sel.reg(FAMILY_DWORD)); tmp1 = GenRegister::retype(tmp1, GEN_TYPE_D); sel.MOV(tmp1, src1); sel.MATH(tmp0, function, tmp0, tmp1); GenRegister unpacked; if(family == FAMILY_WORD) { unpacked = GenRegister::unpacked_uw(reg); } else { unpacked = GenRegister::unpacked_ub(reg); } unpacked = GenRegister::retype(unpacked, getGenType(type)); sel.MOV(dst, unpacked); } else if (type == TYPE_S32 || type == TYPE_U32 ) { sel.MATH(dst, function, src0, src1); } else if(type == TYPE_FLOAT) { GBE_ASSERT(op != OP_REM); sel.MATH(dst, GEN_MATH_FUNCTION_FDIV, src0, src1); } else if (type == TYPE_S64 || type == TYPE_U64) { GenRegister tmp[14]; for(int i=0; i<13; i++) { tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD)); tmp[i].type = GEN_TYPE_UD; } tmp[13] = sel.selReg(sel.reg(FAMILY_BOOL)); if(op == OP_DIV) sel.I64DIV(dst, src0, src1, tmp); else sel.I64REM(dst, src0, src1, tmp); } markAllChildren(dag); return true; } INLINE bool emit(Selection::Opaque &sel, SelectionDAG &dag) const { using namespace ir; const ir::BinaryInstruction &insn = cast(dag.insn); const Opcode opcode = insn.getOpcode(); const Type type = insn.getType(); GenRegister dst = sel.selReg(insn.getDst(0), type); if(opcode == OP_DIV || opcode == OP_REM) { return this->emitDivRemInst(sel, dag, opcode); } // Immediates not supported if (opcode == OP_POW) { GenRegister src0 = sel.selReg(insn.getSrc(0), type); GenRegister src1 = sel.selReg(insn.getSrc(1), type); if(type == TYPE_FLOAT) { sel.MATH(dst, GEN_MATH_FUNCTION_POW, src0, src1); } else { NOT_IMPLEMENTED; } markAllChildren(dag); return true; } sel.push(); // Boolean values use scalars if (sel.isScalarOrBool(insn.getDst(0)) == true) { sel.curr.execWidth = 1; sel.curr.predicate = GEN_PREDICATE_NONE; sel.curr.noMask = 1; } // Look for immediate values GenRegister src0, src1; SelectionDAG *dag0 = dag.child[0]; SelectionDAG *dag1 = dag.child[1]; // Right source can always be an immediate if (OCL_OPTIMIZE_IMMEDIATE && dag1 != NULL && dag1->insn.getOpcode() == OP_LOADI && canGetRegisterFromImmediate(dag1->insn)) { const auto &childInsn = cast(dag1->insn); src0 = sel.selReg(insn.getSrc(0), type); src1 = getRegisterFromImmediate(childInsn.getImmediate()); if (dag0) dag0->isRoot = 1; } // Left source cannot be immediate but it is OK if we can commute else if (OCL_OPTIMIZE_IMMEDIATE && dag0 != NULL && insn.commutes() && dag0->insn.getOpcode() == OP_LOADI && canGetRegisterFromImmediate(dag0->insn)) { const auto &childInsn = cast(dag0->insn); src0 = sel.selReg(insn.getSrc(1), type); src1 = getRegisterFromImmediate(childInsn.getImmediate()); if (dag1) dag1->isRoot = 1; } // Just grab the two sources else { src0 = sel.selReg(insn.getSrc(0), type); src1 = sel.selReg(insn.getSrc(1), type); markAllChildren(dag); } // Output the binary instruction switch (opcode) { case OP_ADD: if (type == Type::TYPE_U64 || type == Type::TYPE_S64) { GenRegister t = sel.selReg(sel.reg(RegisterFamily::FAMILY_QWORD), Type::TYPE_S64); sel.I64ADD(dst, src0, src1, t); } else sel.ADD(dst, src0, src1); break; case OP_ADDSAT: if (type == Type::TYPE_U64 || type == Type::TYPE_S64) { GenRegister tmp[6]; for(int i=0; i<5; i++) { tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD)); tmp[i].type = GEN_TYPE_UD; } tmp[5] = sel.selReg(sel.reg(FAMILY_BOOL)); sel.I64SATADD(dst, src0, src1, tmp); break; } sel.push(); sel.curr.saturate = GEN_MATH_SATURATE_SATURATE; sel.ADD(dst, src0, src1); sel.pop(); break; case OP_XOR: if (type == Type::TYPE_U64 || type == Type::TYPE_S64) sel.I64XOR(dst, src0, src1); else sel.XOR(dst, src0, src1); break; case OP_OR: if (type == Type::TYPE_U64 || type == Type::TYPE_S64) sel.I64OR(dst, src0, src1); else sel.OR(dst, src0, src1); break; case OP_AND: if (type == Type::TYPE_U64 || type == Type::TYPE_S64) sel.I64AND(dst, src0, src1); else sel.AND(dst, src0, src1); break; case OP_SUB: if (type == Type::TYPE_U64 || type == Type::TYPE_S64) { GenRegister t = sel.selReg(sel.reg(RegisterFamily::FAMILY_QWORD), Type::TYPE_S64); sel.I64SUB(dst, src0, src1, t); } else sel.ADD(dst, src0, GenRegister::negate(src1)); break; case OP_SUBSAT: if (type == Type::TYPE_U64 || type == Type::TYPE_S64) { GenRegister tmp[6]; for(int i=0; i<5; i++) { tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD)); tmp[i].type = GEN_TYPE_UD; } tmp[5] = sel.selReg(sel.reg(FAMILY_BOOL)); sel.I64SATSUB(dst, src0, src1, tmp); break; } sel.push(); sel.curr.saturate = GEN_MATH_SATURATE_SATURATE; sel.ADD(dst, src0, GenRegister::negate(src1)); sel.pop(); break; case OP_SHL: if (type == TYPE_S64 || type == TYPE_U64) { GenRegister tmp[7]; for(int i = 0; i < 6; i ++) tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD)); tmp[6] = sel.selReg(sel.reg(FAMILY_BOOL)); sel.I64SHL(dst, src0, src1, tmp); } else sel.SHL(dst, src0, src1); break; case OP_SHR: if (type == TYPE_S64 || type == TYPE_U64) { GenRegister tmp[7]; for(int i = 0; i < 6; i ++) tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD)); tmp[6] = sel.selReg(sel.reg(FAMILY_BOOL)); sel.I64SHR(dst, src0, src1, tmp); } else sel.SHR(dst, src0, src1); break; case OP_ASR: if (type == TYPE_S64 || type == TYPE_U64) { GenRegister tmp[7]; for(int i = 0; i < 6; i ++) tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD)); tmp[6] = sel.selReg(sel.reg(FAMILY_BOOL)); sel.I64ASR(dst, src0, src1, tmp); } else sel.ASR(dst, src0, src1); break; case OP_MUL_HI: { GenRegister temp = GenRegister::retype(sel.selReg(sel.reg(FAMILY_DWORD)), GEN_TYPE_UD); sel.MUL_HI(dst, src0, src1, temp); break; } case OP_I64_MUL_HI: { GenRegister temp[10]; for(int i=0; i<9; i++) { temp[i] = sel.selReg(sel.reg(FAMILY_DWORD)); temp[i].type = GEN_TYPE_UD; } temp[9] = sel.selReg(sel.reg(FAMILY_BOOL)); sel.I64_MUL_HI(dst, src0, src1, temp); break; } case OP_MUL: if (type == TYPE_U32 || type == TYPE_S32) { sel.pop(); return false; } else if (type == TYPE_S64 || type == TYPE_U64) { GenRegister tmp[6]; for(int i = 0; i < 6; i++) tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD)); sel.I64MUL(dst, src0, src1, tmp); } else sel.MUL(dst, src0, src1); break; case OP_HADD: { GenRegister temp = GenRegister::retype(sel.selReg(sel.reg(FAMILY_DWORD)), GEN_TYPE_D); sel.HADD(dst, src0, src1, temp); break; } case OP_RHADD: { GenRegister temp = GenRegister::retype(sel.selReg(sel.reg(FAMILY_DWORD)), GEN_TYPE_D); sel.RHADD(dst, src0, src1, temp); break; } case OP_I64HADD: { GenRegister tmp[4]; for(int i=0; i<4; i++) tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD)); sel.I64HADD(dst, src0, src1, tmp); break; } case OP_I64RHADD: { GenRegister tmp[4]; for(int i=0; i<4; i++) tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD)); sel.I64RHADD(dst, src0, src1, tmp); break; } case OP_UPSAMPLE_SHORT: sel.UPSAMPLE_SHORT(dst, src0, src1); break; case OP_UPSAMPLE_INT: sel.UPSAMPLE_INT(dst, src0, src1); break; case OP_UPSAMPLE_LONG: sel.UPSAMPLE_LONG(dst, src0, src1); break; default: NOT_IMPLEMENTED; } sel.pop(); return true; } }; /*! MAD pattern */ class MulAddInstructionPattern : public SelectionPattern { public: /*! Register the pattern for all opcodes of the family */ MulAddInstructionPattern(void) : SelectionPattern(2, 1) { this->opcodes.push_back(ir::OP_ADD); } /*! Implements base class */ virtual bool emit(Selection::Opaque &sel, SelectionDAG &dag) const { using namespace ir; // MAD tend to increase liveness of the sources (since there are three of // them). TODO refine this strategy. Well, we should be able at least to // evaluate per basic block register pressure and selectively enable // disable MADs if (sel.ctx.limitRegisterPressure) return false; // We are good to try. We need a MUL for one of the two sources const ir::BinaryInstruction &insn = cast(dag.insn); if (insn.getType() != TYPE_FLOAT) return false; SelectionDAG *child0 = dag.child[0]; SelectionDAG *child1 = dag.child[1]; const GenRegister dst = sel.selReg(insn.getDst(0), TYPE_FLOAT); if (child0 && child0->insn.getOpcode() == OP_MUL) { GBE_ASSERT(cast(child0->insn).getType() == TYPE_FLOAT); const GenRegister src0 = sel.selReg(child0->insn.getSrc(0), TYPE_FLOAT); const GenRegister src1 = sel.selReg(child0->insn.getSrc(1), TYPE_FLOAT); const GenRegister src2 = sel.selReg(insn.getSrc(1), TYPE_FLOAT); sel.MAD(dst, src2, src0, src1); // order different on HW! if (child0->child[0]) child0->child[0]->isRoot = 1; if (child0->child[1]) child0->child[1]->isRoot = 1; if (child1) child1->isRoot = 1; return true; } if (child1 && child1->insn.getOpcode() == OP_MUL) { GBE_ASSERT(cast(child1->insn).getType() == TYPE_FLOAT); const GenRegister src0 = sel.selReg(child1->insn.getSrc(0), TYPE_FLOAT); const GenRegister src1 = sel.selReg(child1->insn.getSrc(1), TYPE_FLOAT); const GenRegister src2 = sel.selReg(insn.getSrc(0), TYPE_FLOAT); sel.MAD(dst, src2, src0, src1); // order different on HW! if (child1->child[0]) child1->child[0]->isRoot = 1; if (child1->child[1]) child1->child[1]->isRoot = 1; if (child0) child0->isRoot = 1; return true; } return false; } }; /*! sel.{le,l,ge...} like patterns */ class SelectModifierInstructionPattern : public SelectionPattern { public: /*! Register the pattern for all opcodes of the family */ SelectModifierInstructionPattern(void) : SelectionPattern(2, 1) { this->opcodes.push_back(ir::OP_SEL); } /*! Implements base class */ virtual bool emit(Selection::Opaque &sel, SelectionDAG &dag) const { using namespace ir; SelectionDAG *cmp = dag.child[0]; const SelectInstruction &insn = cast(dag.insn); if (insn.getType() == TYPE_S64 || insn.getType() == TYPE_U64) // not support return false; // Not in this block if (cmp == NULL) return false; // We need to match a compare if (cmp->insn.isMemberOf() == false) return false; // We look for something like that: // cmp.{le,ge...} flag src0 src1 // sel dst flag src0 src1 // So both sources must match if (sourceMatch(cmp, 0, &dag, 1) == false) return false; if (sourceMatch(cmp, 1, &dag, 2) == false) return false; // OK, we merge the instructions const ir::CompareInstruction &cmpInsn = cast(cmp->insn); const ir::Opcode opcode = cmpInsn.getOpcode(); const uint32_t genCmp = getGenCompare(opcode); // Like for regular selects, we need a temporary since we cannot predicate // properly const ir::Type type = cmpInsn.getType(); const RegisterFamily family = getFamily(type); const GenRegister tmp = sel.selReg(sel.reg(family), type); const uint32_t simdWidth = sel.curr.execWidth; const GenRegister dst = sel.selReg(insn.getDst(0), type); const GenRegister src0 = sel.selReg(cmpInsn.getSrc(0), type); const GenRegister src1 = sel.selReg(cmpInsn.getSrc(1), type); sel.push(); sel.curr.predicate = GEN_PREDICATE_NONE; sel.curr.execWidth = simdWidth; sel.SEL_CMP(genCmp, tmp, src0, src1); sel.pop(); // Update the destination register properly now sel.MOV(dst, tmp); // We need the sources of the compare instruction markAllChildren(*cmp); return true; } }; /*! 32 bits integer multiply needs more instructions */ class Int32x32MulInstructionPattern : public SelectionPattern { public: /*! Register the pattern for all opcodes of the family */ Int32x32MulInstructionPattern(void) : SelectionPattern(1, 4) { this->opcodes.push_back(ir::OP_MUL); } /*! Implements base class */ virtual bool emit(Selection::Opaque &sel, SelectionDAG &dag) const { using namespace ir; const ir::BinaryInstruction &insn = cast(dag.insn); const uint32_t simdWidth = sel.curr.execWidth; const Type type = insn.getType(); if (type == TYPE_U32 || type == TYPE_S32) { GenRegister dst = sel.selReg(insn.getDst(0), type); GenRegister src0 = sel.selReg(insn.getSrc(0), type); GenRegister src1 = sel.selReg(insn.getSrc(1), type); sel.push(); // Either left part of the 16-wide register or just a simd 8 register dst = GenRegister::retype(dst, GEN_TYPE_D); src0 = GenRegister::retype(src0, GEN_TYPE_D); src1 = GenRegister::retype(src1, GEN_TYPE_D); sel.curr.execWidth = 8; sel.curr.quarterControl = GEN_COMPRESSION_Q1; sel.MUL(GenRegister::retype(GenRegister::acc(), GEN_TYPE_D), src0, src1); sel.curr.accWrEnable = 1; sel.MACH(GenRegister::retype(GenRegister::null(), GEN_TYPE_D), src0, src1); sel.curr.accWrEnable = 0; sel.MOV(GenRegister::retype(dst, GEN_TYPE_F), GenRegister::acc()); // Right part of the 16-wide register now if (simdWidth == 16) { int predicate = sel.curr.predicate; int noMask = sel.curr.noMask; sel.curr.noMask = 1; sel.curr.predicate = GEN_PREDICATE_NONE; const GenRegister nextSrc0 = sel.selRegQn(insn.getSrc(0), 1, TYPE_S32); const GenRegister nextSrc1 = sel.selRegQn(insn.getSrc(1), 1, TYPE_S32); sel.MUL(GenRegister::retype(GenRegister::acc(), GEN_TYPE_D), nextSrc0, nextSrc1); sel.curr.accWrEnable = 1; sel.MACH(GenRegister::retype(GenRegister::null(), GEN_TYPE_D), nextSrc0, nextSrc1); sel.curr.accWrEnable = 0; sel.curr.quarterControl = GEN_COMPRESSION_Q2; if (predicate != GEN_PREDICATE_NONE || noMask != 1) { const ir::Register reg = sel.reg(FAMILY_DWORD); sel.MOV(GenRegister::f8grf(reg), GenRegister::acc()); sel.curr.noMask = noMask;; sel.curr.predicate = predicate; sel.MOV(GenRegister::retype(GenRegister::next(dst), GEN_TYPE_F), GenRegister::f8grf(reg)); } else sel.MOV(GenRegister::retype(GenRegister::next(dst), GEN_TYPE_F), GenRegister::acc()); } sel.pop(); // All children are marked as root markAllChildren(dag); return true; } else return false; } }; /*! 32x16 bits integer can be done in one instruction */ class Int32x16MulInstructionPattern : public SelectionPattern { public: /*! Register the pattern for all opcodes of the family */ Int32x16MulInstructionPattern(void) : SelectionPattern(1, 1) { this->opcodes.push_back(ir::OP_MUL); } bool is16BitSpecialReg(ir::Register reg) const { if (reg == ir::ocl::lid0 || reg == ir::ocl::lid1 || reg == ir::ocl::lid2 || reg == ir::ocl::lsize0 || reg == ir::ocl::lsize1|| reg == ir::ocl::lsize2) return true; else return false; } /*! Try to emit a multiply where child childID is a 16 immediate */ bool emitMulImmediate(Selection::Opaque &sel, SelectionDAG &dag, uint32_t childID) const { using namespace ir; const ir::BinaryInstruction &insn = cast(dag.insn); const Register dst = insn.getDst(0); const Register src1 = insn.getSrc(childID ^ 1); const SelectionDAG *src0DAG = dag.child[childID]; if (src0DAG != NULL) { if (src0DAG->insn.getOpcode() == OP_LOADI) { const auto &loadimm = cast(src0DAG->insn); const Immediate imm = loadimm.getImmediate(); const Type type = imm.type; GBE_ASSERT(type == TYPE_U32 || type == TYPE_S32); if (type == TYPE_U32 && imm.data.u32 <= 0xffff) { sel.MUL(sel.selReg(dst, type), sel.selReg(src1, type), GenRegister::immuw(imm.data.u32)); if (dag.child[childID ^ 1] != NULL) dag.child[childID ^ 1]->isRoot = 1; return true; } if (type == TYPE_S32 && (imm.data.s32 >= -32768 && imm.data.s32 <= 32767)) { sel.MUL(sel.selReg(dst, type), sel.selReg(src1, type), GenRegister::immw(imm.data.s32)); if (dag.child[childID ^ 1] != NULL) dag.child[childID ^ 1]->isRoot = 1; return true; } } } return false; } /*! Try to emit a multiply with a 16 bit special register */ bool emitMulSpecialReg(Selection::Opaque &sel, SelectionDAG &dag, uint32_t childID) const { using namespace ir; const BinaryInstruction &insn = cast(dag.insn); const Type type = insn.getType(); const Register dst = insn.getDst(0); const Register src0 = insn.getSrc(childID); const Register src1 = insn.getSrc(childID ^ 1); if (is16BitSpecialReg(src0)) { sel.MUL(sel.selReg(dst, type), sel.selReg(src1, type), sel.selReg(src0, TYPE_U32)); markAllChildren(dag); return true; } return false; } virtual bool emit(Selection::Opaque &sel, SelectionDAG &dag) const { using namespace ir; const BinaryInstruction &insn = cast(dag.insn); const Type type = insn.getType(); if (type == TYPE_U32 || type == TYPE_S32) { if (this->emitMulSpecialReg(sel, dag, 0)) return true; if (this->emitMulSpecialReg(sel, dag, 1)) return true; if (this->emitMulImmediate(sel, dag, 0)) return true; if (this->emitMulImmediate(sel, dag, 1)) return true; } return false; } }; #define DECL_NOT_IMPLEMENTED_ONE_TO_MANY(FAMILY) \ struct FAMILY##Pattern : public OneToManyPattern\ {\ INLINE bool emitOne(Selection::Opaque &sel, const ir::FAMILY &insn) const {\ NOT_IMPLEMENTED;\ return false;\ }\ DECL_CTOR(FAMILY, 1, 1); \ } #undef DECL_NOT_IMPLEMENTED_ONE_TO_MANY /*! Load immediate pattern */ DECL_PATTERN(LoadImmInstruction) { INLINE bool emitOne(Selection::Opaque &sel, const ir::LoadImmInstruction &insn) const { using namespace ir; const Type type = insn.getType(); const Immediate imm = insn.getImmediate(); const GenRegister dst = sel.selReg(insn.getDst(0), type); GenRegister flagReg; sel.push(); if (sel.isScalarOrBool(insn.getDst(0)) == true) { sel.curr.execWidth = 1; if(type == TYPE_BOOL) { if(imm.data.b) { if(sel.curr.predicate == GEN_PREDICATE_NONE) flagReg = GenRegister::immuw(0xffff); else { if(sel.curr.physicalFlag) flagReg = GenRegister::flag(sel.curr.flag, sel.curr.subFlag); else flagReg = sel.selReg(Register(sel.curr.flagIndex), TYPE_U16); } } else flagReg = GenRegister::immuw(0x0); } sel.curr.predicate = GEN_PREDICATE_NONE; sel.curr.noMask = 1; } switch (type) { case TYPE_BOOL: sel.MOV(dst, flagReg); break; case TYPE_U32: case TYPE_S32: case TYPE_FLOAT: sel.MOV(GenRegister::retype(dst, GEN_TYPE_F), GenRegister::immf(imm.data.f32)); break; case TYPE_U16: sel.MOV(dst, GenRegister::immuw(imm.data.u16)); break; case TYPE_S16: sel.MOV(dst, GenRegister::immw(imm.data.s16)); break; case TYPE_U8: sel.MOV(dst, GenRegister::immuw(imm.data.u8)); break; case TYPE_S8: sel.MOV(dst, GenRegister::immw(imm.data.s8)); break; case TYPE_DOUBLE: sel.LOAD_DF_IMM(dst, GenRegister::immdf(imm.data.f64), sel.selReg(sel.reg(FAMILY_QWORD))); break; case TYPE_S64: sel.LOAD_INT64_IMM(dst, GenRegister::immint64(imm.data.s64)); break; case TYPE_U64: sel.LOAD_INT64_IMM(dst, GenRegister::immint64(imm.data.u64)); break; default: NOT_SUPPORTED; } sel.pop(); return true; } DECL_CTOR(LoadImmInstruction, 1,1); }; /*! Sync instruction */ DECL_PATTERN(SyncInstruction) { INLINE bool emitOne(Selection::Opaque &sel, const ir::SyncInstruction &insn) const { using namespace ir; const ir::Register reg = sel.reg(FAMILY_DWORD); const uint32_t params = insn.getParameters(); if(params == syncGlobalBarrier) { const ir::Register fenceDst = sel.reg(FAMILY_DWORD); sel.FENCE(sel.selReg(fenceDst, ir::TYPE_U32)); } sel.push(); sel.curr.predicate = GEN_PREDICATE_NONE; // As only the payload.2 is used and all the other regions are ignored // SIMD8 mode here is safe. sel.curr.execWidth = 8; sel.curr.physicalFlag = 0; sel.curr.noMask = 1; // Copy barrier id from r0. sel.AND(GenRegister::ud8grf(reg), GenRegister::ud1grf(ir::ocl::barrierid), GenRegister::immud(0x0f000000)); // A barrier is OK to start the thread synchronization *and* SLM fence sel.BARRIER(GenRegister::f8grf(reg)); // Now we wait for the other threads sel.curr.execWidth = 1; sel.WAIT(); sel.pop(); return true; } DECL_CTOR(SyncInstruction, 1,1); }; INLINE uint32_t getByteScatterGatherSize(ir::Type type) { using namespace ir; switch (type) { case TYPE_DOUBLE: case TYPE_S64: case TYPE_U64: return GEN_BYTE_SCATTER_QWORD; case TYPE_FLOAT: case TYPE_U32: case TYPE_S32: return GEN_BYTE_SCATTER_DWORD; case TYPE_U16: case TYPE_S16: return GEN_BYTE_SCATTER_WORD; case TYPE_U8: case TYPE_S8: return GEN_BYTE_SCATTER_BYTE; default: NOT_SUPPORTED; return GEN_BYTE_SCATTER_BYTE; } } /*! Load instruction pattern */ DECL_PATTERN(LoadInstruction) { void emitUntypedRead(Selection::Opaque &sel, const ir::LoadInstruction &insn, GenRegister addr, uint32_t bti) const { using namespace ir; const uint32_t valueNum = insn.getValueNum(); vector dst(valueNum); for (uint32_t dstID = 0; dstID < valueNum; ++dstID) dst[dstID] = GenRegister::retype(sel.selReg(insn.getValue(dstID)), GEN_TYPE_F); sel.UNTYPED_READ(addr, dst.data(), valueNum, bti); } void emitDWordGather(Selection::Opaque &sel, const ir::LoadInstruction &insn, GenRegister addr, uint32_t bti) const { using namespace ir; const uint32_t valueNum = insn.getValueNum(); const uint32_t simdWidth = sel.ctx.getSimdWidth(); GBE_ASSERT(valueNum == 1); GenRegister dst = GenRegister::retype(sel.selReg(insn.getValue(0)), GEN_TYPE_F); // get dword based address GenRegister addrDW = GenRegister::udxgrf(simdWidth, sel.reg(FAMILY_DWORD)); sel.SHR(addrDW, GenRegister::retype(addr, GEN_TYPE_UD), GenRegister::immud(2)); sel.DWORD_GATHER(dst, addrDW, bti); } void emitRead64(Selection::Opaque &sel, const ir::LoadInstruction &insn, GenRegister addr, uint32_t bti) const { using namespace ir; const uint32_t valueNum = insn.getValueNum(); uint32_t dstID; /* XXX support scalar only right now. */ GBE_ASSERT(valueNum == 1); // The first 16 DWORD register space is for temporary usage at encode stage. uint32_t tmpRegNum = (sel.ctx.getSimdWidth() == 8) ? valueNum * 2 : valueNum; GenRegister dst[valueNum + tmpRegNum]; for (dstID = 0; dstID < tmpRegNum ; ++dstID) dst[dstID] = sel.selReg(sel.reg(FAMILY_DWORD)); for ( uint32_t valueID = 0; valueID < valueNum; ++dstID, ++valueID) dst[dstID] = sel.selReg(insn.getValue(valueID), ir::TYPE_U64); sel.READ64(addr, sel.selReg(sel.reg(FAMILY_QWORD), ir::TYPE_U64), dst, valueNum + tmpRegNum, valueNum, bti); } void emitByteGather(Selection::Opaque &sel, const ir::LoadInstruction &insn, const uint32_t elemSize, GenRegister address, GenRegister value, uint32_t bti) const { using namespace ir; GBE_ASSERT(insn.getValueNum() == 1); const uint32_t simdWidth = sel.ctx.getSimdWidth(); // We need a temporary register if we read bytes or words Register dst = Register(value.value.reg); if (elemSize == GEN_BYTE_SCATTER_WORD || elemSize == GEN_BYTE_SCATTER_BYTE) { dst = sel.reg(FAMILY_DWORD); sel.BYTE_GATHER(GenRegister::fxgrf(simdWidth, dst), address, elemSize, bti); } // Repack bytes or words using a converting mov instruction if (elemSize == GEN_BYTE_SCATTER_WORD) sel.MOV(GenRegister::retype(value, GEN_TYPE_UW), GenRegister::unpacked_uw(dst)); else if (elemSize == GEN_BYTE_SCATTER_BYTE) sel.MOV(GenRegister::retype(value, GEN_TYPE_UB), GenRegister::unpacked_ub(dst)); } void emitIndirectMove(Selection::Opaque &sel, const ir::LoadInstruction &insn, GenRegister address) const { using namespace ir; GBE_ASSERT(insn.getValueNum() == 1); //todo: handle vec later const GenRegister dst = sel.selReg(insn.getValue(0), insn.getValueType()); const GenRegister src = address; sel.INDIRECT_MOVE(dst, src); } INLINE bool emitOne(Selection::Opaque &sel, const ir::LoadInstruction &insn) const { using namespace ir; const GenRegister address = sel.selReg(insn.getAddress()); const AddressSpace space = insn.getAddressSpace(); GBE_ASSERT(insn.getAddressSpace() == MEM_GLOBAL || insn.getAddressSpace() == MEM_CONSTANT || insn.getAddressSpace() == MEM_PRIVATE || insn.getAddressSpace() == MEM_LOCAL); GBE_ASSERT(sel.ctx.isScalarReg(insn.getValue(0)) == false); const Type type = insn.getValueType(); const uint32_t elemSize = getByteScatterGatherSize(type); if (insn.getAddressSpace() == MEM_CONSTANT) { // XXX TODO read 64bit constant through constant cache // Per HW Spec, constant cache messages can read at least DWORD data. // So, byte/short data type, we have to read through data cache. if(insn.isAligned() == true && elemSize == GEN_BYTE_SCATTER_QWORD) this->emitRead64(sel, insn, address, 0x2); else if(insn.isAligned() == true && elemSize == GEN_BYTE_SCATTER_DWORD) this->emitDWordGather(sel, insn, address, 0x2); else { const GenRegister value = sel.selReg(insn.getValue(0)); this->emitByteGather(sel, insn, elemSize, address, value, 0x2); } } else if (insn.isAligned() == true && elemSize == GEN_BYTE_SCATTER_QWORD) this->emitRead64(sel, insn, address, space == MEM_LOCAL ? 0xfe : 0x00); else if (insn.isAligned() == true && elemSize == GEN_BYTE_SCATTER_DWORD) this->emitUntypedRead(sel, insn, address, space == MEM_LOCAL ? 0xfe : 0x00); else { const GenRegister value = sel.selReg(insn.getValue(0)); this->emitByteGather(sel, insn, elemSize, address, value, space == MEM_LOCAL ? 0xfe : 0x01); } return true; } DECL_CTOR(LoadInstruction, 1, 1); }; /*! Store instruction pattern */ DECL_PATTERN(StoreInstruction) { void emitUntypedWrite(Selection::Opaque &sel, const ir::StoreInstruction &insn, uint32_t bti) const { using namespace ir; const uint32_t valueNum = insn.getValueNum(); const uint32_t addrID = ir::StoreInstruction::addressIndex; GenRegister addr; vector value(valueNum); addr = GenRegister::retype(sel.selReg(insn.getSrc(addrID)), GEN_TYPE_F);; for (uint32_t valueID = 0; valueID < valueNum; ++valueID) value[valueID] = GenRegister::retype(sel.selReg(insn.getValue(valueID)), GEN_TYPE_F); sel.UNTYPED_WRITE(addr, value.data(), valueNum, bti); } void emitWrite64(Selection::Opaque &sel, const ir::StoreInstruction &insn, uint32_t bti) const { using namespace ir; const uint32_t valueNum = insn.getValueNum(); const uint32_t addrID = ir::StoreInstruction::addressIndex; GenRegister addr; uint32_t srcID; /* XXX support scalar only right now. */ GBE_ASSERT(valueNum == 1); addr = GenRegister::retype(sel.selReg(insn.getSrc(addrID)), GEN_TYPE_F); // The first 16 DWORD register space is for temporary usage at encode stage. uint32_t tmpRegNum = (sel.ctx.getSimdWidth() == 8) ? valueNum * 2 : valueNum; GenRegister src[valueNum]; GenRegister dst[tmpRegNum + 1]; /* dst 0 is for the temporary address register. */ dst[0] = sel.selReg(sel.reg(FAMILY_DWORD)); for (srcID = 0; srcID < tmpRegNum; ++srcID) dst[srcID + 1] = sel.selReg(sel.reg(FAMILY_DWORD)); for (uint32_t valueID = 0; valueID < valueNum; ++valueID) src[valueID] = sel.selReg(insn.getValue(valueID), ir::TYPE_U64); sel.WRITE64(addr, src, valueNum, dst, tmpRegNum + 1, bti); } void emitByteScatter(Selection::Opaque &sel, const ir::StoreInstruction &insn, const uint32_t elemSize, GenRegister addr, GenRegister value, uint32_t bti) const { using namespace ir; const uint32_t simdWidth = sel.ctx.getSimdWidth(); const GenRegister dst = value; GBE_ASSERT(insn.getValueNum() == 1); if (elemSize == GEN_BYTE_SCATTER_WORD) { value = GenRegister::udxgrf(simdWidth, sel.reg(FAMILY_DWORD)); sel.MOV(value, GenRegister::retype(dst, GEN_TYPE_UW)); } else if (elemSize == GEN_BYTE_SCATTER_BYTE) { value = GenRegister::udxgrf(simdWidth, sel.reg(FAMILY_DWORD)); sel.MOV(value, GenRegister::retype(dst, GEN_TYPE_UB)); } sel.BYTE_SCATTER(addr, value, elemSize, bti); } INLINE bool emitOne(Selection::Opaque &sel, const ir::StoreInstruction &insn) const { using namespace ir; const AddressSpace space = insn.getAddressSpace(); const uint32_t bti = space == MEM_LOCAL ? 0xfe : 0x01; const Type type = insn.getValueType(); const uint32_t elemSize = getByteScatterGatherSize(type); if (insn.isAligned() == true && elemSize == GEN_BYTE_SCATTER_QWORD) this->emitWrite64(sel, insn, bti); else if (insn.isAligned() == true && elemSize == GEN_BYTE_SCATTER_DWORD) this->emitUntypedWrite(sel, insn, bti); else { const GenRegister address = sel.selReg(insn.getAddress()); const GenRegister value = sel.selReg(insn.getValue(0)); this->emitByteScatter(sel, insn, elemSize, address, value, bti); } return true; } DECL_CTOR(StoreInstruction, 1, 1); }; /*! Compare instruction pattern */ class CompareInstructionPattern : public SelectionPattern { public: CompareInstructionPattern(void) : SelectionPattern(1,1) { for (uint32_t op = 0; op < ir::OP_INVALID; ++op) if (ir::isOpcodeFrom(ir::Opcode(op)) == true) this->opcodes.push_back(ir::Opcode(op)); } INLINE bool emit(Selection::Opaque &sel, SelectionDAG &dag) const { using namespace ir; const ir::CompareInstruction &insn = cast(dag.insn); const Opcode opcode = insn.getOpcode(); const Type type = insn.getType(); const uint32_t genCmp = getGenCompare(opcode); const Register dst = insn.getDst(0); // Limit the compare to the active lanes. Use the same compare as for f0.0 sel.push(); const LabelIndex label = insn.getParent()->getLabelIndex(); const GenRegister blockip = sel.selReg(ocl::blockip, TYPE_U16); const GenRegister labelReg = GenRegister::immuw(label); sel.curr.predicate = GEN_PREDICATE_NONE; sel.curr.physicalFlag = 0; sel.curr.flagIndex = uint16_t(dst); sel.CMP(GEN_CONDITIONAL_LE, blockip, labelReg); sel.pop(); // Look for immediate values for the right source GenRegister src0, src1; SelectionDAG *dag0 = dag.child[0]; SelectionDAG *dag1 = dag.child[1]; // Right source can always be an immediate if (OCL_OPTIMIZE_IMMEDIATE && dag1 != NULL && dag1->insn.getOpcode() == OP_LOADI && canGetRegisterFromImmediate(dag1->insn)) { const auto &childInsn = cast(dag1->insn); src0 = sel.selReg(insn.getSrc(0), type); src1 = getRegisterFromImmediate(childInsn.getImmediate()); if (dag0) dag0->isRoot = 1; } else { src0 = sel.selReg(insn.getSrc(0), type); src1 = sel.selReg(insn.getSrc(1), type); markAllChildren(dag); } sel.push(); sel.curr.physicalFlag = 0; sel.curr.flagIndex = uint16_t(dst); if (type == TYPE_S64 || type == TYPE_U64) { GenRegister tmp[3]; for(int i=0; i<3; i++) tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD)); sel.I64CMP(genCmp, src0, src1, tmp); } else sel.CMP(genCmp, src0, src1); sel.pop(); return true; } }; /*! Convert instruction pattern */ DECL_PATTERN(ConvertInstruction) { INLINE bool emitOne(Selection::Opaque &sel, const ir::ConvertInstruction &insn) const { using namespace ir; const Type dstType = insn.getDstType(); const Type srcType = insn.getSrcType(); const RegisterFamily dstFamily = getFamily(dstType); const RegisterFamily srcFamily = getFamily(srcType); const GenRegister dst = sel.selReg(insn.getDst(0), dstType); const GenRegister src = sel.selReg(insn.getSrc(0), srcType); if(insn.getOpcode() == ir::OP_SAT_CVT) { sel.push(); sel.curr.saturate = 1; } // We need two instructions to make the conversion if (dstFamily != FAMILY_DWORD && dstFamily != FAMILY_QWORD && (srcFamily == FAMILY_DWORD || srcFamily == FAMILY_QWORD)) { GenRegister unpacked; if (dstFamily == FAMILY_WORD) { const uint32_t type = dstType == TYPE_U16 ? GEN_TYPE_UW : GEN_TYPE_W; unpacked = GenRegister::unpacked_uw(sel.reg(FAMILY_DWORD)); unpacked = GenRegister::retype(unpacked, type); } else { const uint32_t type = dstType == TYPE_U8 ? GEN_TYPE_UB : GEN_TYPE_B; unpacked = GenRegister::unpacked_ub(sel.reg(FAMILY_DWORD)); unpacked = GenRegister::retype(unpacked, type); } if(srcFamily == FAMILY_QWORD) { GenRegister tmp = sel.selReg(sel.reg(FAMILY_DWORD)); tmp.type = GEN_TYPE_D; sel.CONVI64_TO_I(tmp, src); sel.MOV(unpacked, tmp); } else sel.MOV(unpacked, src); sel.MOV(dst, unpacked); } else if ((dstType == ir::TYPE_S32 || dstType == ir::TYPE_U32) && srcFamily == FAMILY_QWORD) { sel.CONVI64_TO_I(dst, src); } else if (dstType == ir::TYPE_FLOAT && srcFamily == FAMILY_QWORD) { GenRegister tmp[4]; for(int i=0; i<3; i++) { tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD)); tmp[i].type = GEN_TYPE_UD; } tmp[3] = sel.selReg(sel.reg(FAMILY_BOOL)); sel.CONVI64_TO_F(dst, src, tmp); } else if (dst.isdf()) { ir::Register r = sel.reg(ir::RegisterFamily::FAMILY_QWORD); sel.MOV_DF(dst, src, sel.selReg(r)); } else if (dst.isint64()) { switch(src.type) { case GEN_TYPE_F: sel.CONVF_TO_I64(dst, src, sel.selReg(sel.reg(FAMILY_DWORD))); break; case GEN_TYPE_DF: NOT_IMPLEMENTED; default: sel.CONVI_TO_I64(dst, src, sel.selReg(sel.reg(FAMILY_DWORD))); } } else sel.MOV(dst, src); if(insn.getOpcode() == ir::OP_SAT_CVT) sel.pop(); return true; } DECL_CTOR(ConvertInstruction, 1, 1); }; /*! Convert instruction pattern */ DECL_PATTERN(AtomicInstruction) { INLINE bool emitOne(Selection::Opaque &sel, const ir::AtomicInstruction &insn) const { using namespace ir; const AtomicOps atomicOp = insn.getAtomicOpcode(); const AddressSpace space = insn.getAddressSpace(); const uint32_t bti = space == MEM_LOCAL ? 0xfe : 0x01; const uint32_t srcNum = insn.getSrcNum(); const GenRegister src0 = sel.selReg(insn.getSrc(0), TYPE_U32); //address GenRegister src1 = src0, src2 = src0; if(srcNum > 1) src1 = sel.selReg(insn.getSrc(1), TYPE_U32); if(srcNum > 2) src2 = sel.selReg(insn.getSrc(2), TYPE_U32); GenRegister dst = sel.selReg(insn.getDst(0), TYPE_U32); GenAtomicOpCode genAtomicOp = (GenAtomicOpCode)atomicOp; sel.ATOMIC(dst, genAtomicOp, srcNum, src0, src1, src2, bti); return true; } DECL_CTOR(AtomicInstruction, 1, 1); }; /*! Select instruction pattern */ class SelectInstructionPattern : public SelectionPattern { public: SelectInstructionPattern(void) : SelectionPattern(1,1) { for (uint32_t op = 0; op < ir::OP_INVALID; ++op) if (ir::isOpcodeFrom(ir::Opcode(op)) == true) this->opcodes.push_back(ir::Opcode(op)); } INLINE bool emit(Selection::Opaque &sel, SelectionDAG &dag) const { using namespace ir; const ir::SelectInstruction &insn = cast(dag.insn); // Get all registers for the instruction const Type type = insn.getType(); const GenRegister dst = sel.selReg(insn.getDst(0), type); // Look for immediate values for the right source GenRegister src0, src1; SelectionDAG *dag0 = dag.child[0]; // source 0 is the predicate! SelectionDAG *dag1 = dag.child[1]; SelectionDAG *dag2 = dag.child[2]; // Right source can always be an immediate if (OCL_OPTIMIZE_IMMEDIATE && dag2 != NULL && dag2->insn.getOpcode() == OP_LOADI && canGetRegisterFromImmediate(dag2->insn)) { const auto &childInsn = cast(dag2->insn); src0 = sel.selReg(insn.getSrc(SelectInstruction::src0Index), type); src1 = getRegisterFromImmediate(childInsn.getImmediate()); if (dag0) dag0->isRoot = 1; if (dag1) dag1->isRoot = 1; } else { src0 = sel.selReg(insn.getSrc(SelectInstruction::src0Index), type); src1 = sel.selReg(insn.getSrc(SelectInstruction::src1Index), type); markAllChildren(dag); } // Since we cannot predicate the select instruction with our current mask, // we need to perform the selection in two steps (one to select, one to // update the destination register) const RegisterFamily family = getFamily(type); const GenRegister tmp = sel.selReg(sel.reg(family), type); const uint32_t simdWidth = sel.ctx.getSimdWidth(); const Register pred = insn.getPredicate(); sel.push(); sel.curr.predicate = GEN_PREDICATE_NORMAL; sel.curr.execWidth = simdWidth; sel.curr.physicalFlag = 0; sel.curr.flagIndex = uint16_t(pred); sel.curr.noMask = 0; if(type == ir::TYPE_S64 || type == ir::TYPE_U64) sel.SEL_INT64(tmp, src0, src1); else sel.SEL(tmp, src0, src1); sel.pop(); // Update the destination register properly now sel.MOV(dst, tmp); return true; } }; DECL_PATTERN(TernaryInstruction) { INLINE bool emitOne(Selection::Opaque &sel, const ir::TernaryInstruction &insn) const { using namespace ir; const Type type = insn.getType(); const GenRegister dst = sel.selReg(insn.getDst(0), type), src0 = sel.selReg(insn.getSrc(0), type), src1 = sel.selReg(insn.getSrc(1), type), src2 = sel.selReg(insn.getSrc(2), type); switch(insn.getOpcode()) { case OP_I64MADSAT: { GenRegister tmp[10]; for(int i=0; i<9; i++) { tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD)); tmp[i].type = GEN_TYPE_UD; } tmp[9] = sel.selReg(sel.reg(FAMILY_BOOL)); sel.I64MADSAT(dst, src0, src1, src2, tmp); break; } default: NOT_IMPLEMENTED; } return true; } DECL_CTOR(TernaryInstruction, 1, 1); }; /*! Label instruction pattern */ DECL_PATTERN(LabelInstruction) { INLINE bool emitOne(Selection::Opaque &sel, const ir::LabelInstruction &insn) const { using namespace ir; const LabelIndex label = insn.getLabelIndex(); const GenRegister src0 = sel.selReg(ocl::blockip); const GenRegister src1 = GenRegister::immuw(label); const uint32_t simdWidth = sel.ctx.getSimdWidth(); sel.LABEL(label); // Do not emit any code for the "returning" block. There is no need for it if (insn.getParent() == &sel.ctx.getFunction().getBottomBlock()) return true; // Emit the mask computation at the head of each basic block sel.push(); sel.curr.predicate = GEN_PREDICATE_NONE; sel.curr.flag = 0; sel.curr.subFlag = 0; sel.CMP(GEN_CONDITIONAL_LE, GenRegister::retype(src0, GEN_TYPE_UW), src1); sel.pop(); // If it is required, insert a JUMP to bypass the block if (sel.ctx.hasJIP(&insn)) { const LabelIndex jip = sel.ctx.getLabelIndex(&insn); sel.push(); if (simdWidth == 8) sel.curr.predicate = GEN_PREDICATE_ALIGN1_ANY8H; else if (simdWidth == 16) sel.curr.predicate = GEN_PREDICATE_ALIGN1_ANY16H; else NOT_IMPLEMENTED; sel.curr.inversePredicate = 1; sel.curr.execWidth = 1; sel.curr.flag = 0; sel.curr.subFlag = 0; sel.curr.noMask = 1; sel.JMPI(GenRegister::immd(0), jip); sel.pop(); } return true; } DECL_CTOR(LabelInstruction, 1, 1); }; DECL_PATTERN(SampleInstruction) { INLINE bool emitOne(Selection::Opaque &sel, const ir::SampleInstruction &insn) const { using namespace ir; GenRegister msgPayloads[4]; GenRegister dst[insn.getDstNum()], src[insn.getSrcNum() - 2]; uint32_t srcNum = insn.getSrcNum(); uint32_t samplerOffset = 0; if (srcNum == 6) { /* We have the clamp border workaround. */ samplerOffset = insn.getSrc(srcNum - 1).value() * 8; srcNum--; } for( int i = 0; i < 4; ++i) msgPayloads[i] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32); for (uint32_t valueID = 0; valueID < insn.getDstNum(); ++valueID) dst[valueID] = sel.selReg(insn.getDst(valueID), insn.getDstType()); for (uint32_t valueID = 0; valueID < srcNum - 2; ++valueID) src[valueID] = sel.selReg(insn.getSrc(valueID + 2), insn.getSrcType()); uint32_t bti = sel.ctx.getFunction().getImageSet()->getIdx (insn.getSrc(SampleInstruction::SURFACE_BTI)); uint32_t sampler = sel.ctx.getFunction().getSamplerSet()->getIdx (insn.getSrc(SampleInstruction::SAMPLER_BTI)) + samplerOffset; sel.SAMPLE(dst, insn.getDstNum(), src, srcNum - 2, msgPayloads, 4, bti, sampler); return true; } DECL_CTOR(SampleInstruction, 1, 1); }; /*! Typed write instruction pattern. */ DECL_PATTERN(TypedWriteInstruction) { INLINE bool emitOne(Selection::Opaque &sel, const ir::TypedWriteInstruction &insn) const { using namespace ir; const uint32_t simdWidth = sel.ctx.getSimdWidth(); uint32_t valueID = 0; GenRegister msgs[9]; // (header + U + V + R + LOD + 4) GenRegister src[insn.getSrcNum()]; uint32_t msgNum = (8 / (simdWidth / 8)) + 1; uint32_t coordNum = (insn.getSrcNum() == 7) ? 2 : 3; for(uint32_t i = 0; i < msgNum; i++) msgs[i] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32); // u, v, w coords should use coord type. for (; valueID < coordNum; ++valueID) src[valueID] = sel.selReg(insn.getSrc(valueID + 1), insn.getCoordType()); for (; (valueID + 1) < insn.getSrcNum(); ++valueID) src[valueID] = sel.selReg(insn.getSrc(valueID + 1), insn.getSrcType()); uint32_t bti = sel.ctx.getFunction().getImageSet()->getIdx (insn.getSrc(TypedWriteInstruction::SURFACE_BTI)); sel.TYPED_WRITE(src, insn.getSrcNum() - 1, msgs, msgNum, bti); return true; } DECL_CTOR(TypedWriteInstruction, 1, 1); }; /*! get image info instruction pattern. */ DECL_PATTERN(GetImageInfoInstruction) { INLINE bool emitOne(Selection::Opaque &sel, const ir::GetImageInfoInstruction &insn) const { using namespace ir; GenRegister dst; dst = sel.selReg(insn.getDst(0), TYPE_U32); GenRegister imageInfoReg = GenRegister::ud1grf(insn.getSrc(0)); sel.MOV(dst, imageInfoReg); return true; } DECL_CTOR(GetImageInfoInstruction, 1, 1); }; /*! get sampler info instruction pattern. */ DECL_PATTERN(GetSamplerInfoInstruction) { INLINE bool emitOne(Selection::Opaque &sel, const ir::GetSamplerInfoInstruction &insn) const { using namespace ir; GenRegister dst, src; dst = sel.selReg(insn.getDst(0), TYPE_U16); src = GenRegister::offset(GenRegister::uw1grf(ocl::samplerinfo), 0, sel.ctx.getFunction().getSamplerSet()->getIdx(insn.getSrc(0)) * 2); src.subphysical = 1; sel.MOV(dst, src); return true; } DECL_CTOR(GetSamplerInfoInstruction, 1, 1); }; /*! Branch instruction pattern */ DECL_PATTERN(BranchInstruction) { void emitForwardBranch(Selection::Opaque &sel, const ir::BranchInstruction &insn, ir::LabelIndex dst, ir::LabelIndex src) const { using namespace ir; const GenRegister ip = sel.selReg(ocl::blockip, TYPE_U16); const LabelIndex jip = sel.ctx.getLabelIndex(&insn); const uint32_t simdWidth = sel.ctx.getSimdWidth(); // We will not emit any jump if we must go the next block anyway const BasicBlock *curr = insn.getParent(); const BasicBlock *next = curr->getNextBlock(); const LabelIndex nextLabel = next->getLabelIndex(); if (insn.isPredicated() == true) { const Register pred = insn.getPredicateIndex(); // Update the PcIPs sel.push(); sel.curr.physicalFlag = 0; sel.curr.flagIndex = uint16_t(pred); sel.MOV(ip, GenRegister::immuw(uint16_t(dst))); sel.pop(); if (nextLabel == jip) return; // It is slightly more complicated than for backward jump. We check that // all PcIPs are greater than the next block IP to be sure that we can // jump sel.push(); sel.curr.physicalFlag = 0; sel.curr.flagIndex = uint16_t(pred); sel.curr.predicate = GEN_PREDICATE_NONE; sel.CMP(GEN_CONDITIONAL_G, ip, GenRegister::immuw(nextLabel)); // Branch to the jump target // XXX TODO: For group size not aligned to simdWidth, ALL8/16h may not // work correct, as flag register bits mapped to non-active lanes tend // to be zero. if (simdWidth == 8) sel.curr.predicate = GEN_PREDICATE_ALIGN1_ALL8H; else if (simdWidth == 16) sel.curr.predicate = GEN_PREDICATE_ALIGN1_ALL16H; else NOT_SUPPORTED; sel.curr.execWidth = 1; sel.curr.noMask = 1; sel.JMPI(GenRegister::immd(0), jip); sel.pop(); } else { // Update the PcIPs sel.MOV(ip, GenRegister::immuw(uint16_t(dst))); // Do not emit branch when we go to the next block anyway if (nextLabel == jip) return; sel.push(); sel.curr.execWidth = 1; sel.curr.noMask = 1; sel.curr.predicate = GEN_PREDICATE_NONE; sel.JMPI(GenRegister::immd(0), jip); sel.pop(); } } void emitBackwardBranch(Selection::Opaque &sel, const ir::BranchInstruction &insn, ir::LabelIndex dst, ir::LabelIndex src) const { using namespace ir; const GenRegister ip = sel.selReg(ocl::blockip, TYPE_U16); const Function &fn = sel.ctx.getFunction(); const BasicBlock &bb = fn.getBlock(src); const LabelIndex jip = sel.ctx.getLabelIndex(&insn); const uint32_t simdWidth = sel.ctx.getSimdWidth(); GBE_ASSERT(bb.getNextBlock() != NULL); if (insn.isPredicated() == true) { const Register pred = insn.getPredicateIndex(); // Update the PcIPs for all the branches. Just put the IPs of the next // block. Next instruction will properly reupdate the IPs of the lanes // that actually take the branch const LabelIndex next = bb.getNextBlock()->getLabelIndex(); sel.MOV(ip, GenRegister::immuw(uint16_t(next))); sel.push(); // Re-update the PcIPs for the branches that takes the backward jump sel.curr.physicalFlag = 0; sel.curr.flagIndex = uint16_t(pred); sel.MOV(ip, GenRegister::immuw(uint16_t(dst))); // Branch to the jump target if (simdWidth == 8) sel.curr.predicate = GEN_PREDICATE_ALIGN1_ANY8H; else if (simdWidth == 16) sel.curr.predicate = GEN_PREDICATE_ALIGN1_ANY16H; else NOT_SUPPORTED; sel.curr.execWidth = 1; sel.curr.noMask = 1; sel.JMPI(GenRegister::immd(0), jip); sel.pop(); } else { // Update the PcIPs sel.MOV(ip, GenRegister::immuw(uint16_t(dst))); // Branch to the jump target sel.push(); sel.curr.execWidth = 1; sel.curr.noMask = 1; sel.curr.predicate = GEN_PREDICATE_NONE; sel.JMPI(GenRegister::immd(0), jip); sel.pop(); } } INLINE bool emitOne(Selection::Opaque &sel, const ir::BranchInstruction &insn) const { using namespace ir; const Opcode opcode = insn.getOpcode(); if (opcode == OP_RET) sel.EOT(); else if (opcode == OP_BRA) { const LabelIndex dst = insn.getLabelIndex(); const LabelIndex src = insn.getParent()->getLabelIndex(); // We handle foward and backward branches differently if (uint32_t(dst) <= uint32_t(src)) this->emitBackwardBranch(sel, insn, dst, src); else this->emitForwardBranch(sel, insn, dst, src); } else NOT_IMPLEMENTED; return true; } DECL_CTOR(BranchInstruction, 1, 1); }; /*! Sort patterns */ INLINE bool cmp(const SelectionPattern *p0, const SelectionPattern *p1) { if (p0->insnNum != p1->insnNum) return p0->insnNum > p1->insnNum; return p0->cost < p1->cost; } SelectionLibrary::SelectionLibrary(void) { this->insert(); this->insert(); this->insert(); this->insert(); this->insert(); this->insert(); this->insert(); this->insert(); this->insert(); this->insert(); this->insert(); this->insert(); this->insert(); this->insert(); this->insert(); this->insert(); this->insert(); this->insert(); this->insert(); this->insert(); this->insert(); // Sort all the patterns with the number of instructions they output for (uint32_t op = 0; op < ir::OP_INVALID; ++op) std::sort(this->patterns[op].begin(), this->patterns[op].end(), cmp); } SelectionLibrary::~SelectionLibrary(void) { for (auto pattern : this->toFree) GBE_DELETE(const_cast(pattern)); } template void SelectionLibrary::insert(void) { const SelectionPattern *pattern = GBE_NEW_NO_ARG(PatternType); this->toFree.push_back(pattern); for (auto opcode : pattern->opcodes) this->patterns[opcode].push_back(pattern); } } /* namespace gbe */ Release_v0.3/backend/src/backend/gen_insn_selection.hpp000066400000000000000000000175571223142177000233450ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file gen_insn_selection.hpp * \author Benjamin Segovia */ #ifndef __GEN_INSN_SELECTION_HPP__ #define __GEN_INSN_SELECTION_HPP__ #include "ir/register.hpp" #include "ir/instruction.hpp" #include "backend/gen_register.hpp" #include "backend/gen_encoder.hpp" #include "backend/gen_context.hpp" #include "sys/vector.hpp" #include "sys/intrusive_list.hpp" namespace gbe { /*! Translate IR type to Gen type */ uint32_t getGenType(ir::Type type); /*! Translate IR compare to Gen compare */ uint32_t getGenCompare(ir::Opcode opcode); /*! Selection opcodes properly encoded from 0 to n for fast jump tables * generations */ enum SelectionOpcode { #define DECL_SELECTION_IR(OP, FN) SEL_OP_##OP, #include "backend/gen_insn_selection.hxx" #undef DECL_SELECTION_IR }; // Owns and Allocates selection instructions class Selection; // List of SelectionInstruction forms a block class SelectionBlock; /*! A selection instruction is also almost a Gen instruction but *before* the * register allocation */ class SelectionInstruction : public NonCopyable, public intrusive_list_node { public: /*! Owns the instruction */ SelectionBlock *parent; /*! Append an instruction before this one */ void prepend(SelectionInstruction &insn); /*! Append an instruction after this one */ void append(SelectionInstruction &insn); /*! Does it read memory? */ bool isRead(void) const; /*! Does it write memory? */ bool isWrite(void) const; /*! Is it a branch instruction (i.e. modify control flow) */ bool isBranch(void) const; /*! Is it a label instruction (i.e. change the implicit mask) */ bool isLabel(void) const; /*! Get the destination register */ GenRegister &dst(uint32_t dstID) { return regs[dstID]; } /*! Get the source register */ GenRegister &src(uint32_t srcID) { return regs[dstNum+srcID]; } /*! Damn C++ */ const GenRegister &dst(uint32_t dstID) const { return regs[dstID]; } /*! Damn C++ */ const GenRegister &src(uint32_t srcID) const { return regs[dstNum+srcID]; } /*! No more than 17 sources (used by typed writes on simd8 mode.) */ enum { MAX_SRC_NUM = 17 }; /*! No more than 11 destinations (used by samples and untyped reads) */ enum { MAX_DST_NUM = 11 }; /*! State of the instruction (extra fields neeed for the encoding) */ GenInstructionState state; union { struct { /*! Store bti for loads/stores and function for math, atomic and compares */ uint16_t function:8; /*! elemSize for byte scatters / gathers, elemNum for untyped msg, bti for atomic */ uint16_t elem:8; }; struct { /*! Number of sources in the tuple */ uint16_t width:4; /*! vertical stride (0,1,2,4,8 or 16) */ uint16_t vstride:5; /*! horizontal stride (0,1,2,4,8 or 16) */ uint16_t hstride:5; /*! offset (0 to 7) */ uint16_t offset:5; }; struct { uint16_t scratchOffset; uint16_t scratchMsgHeader; }; } extra; /*! Gen opcode */ uint8_t opcode; /*! Number of destinations */ uint8_t dstNum:4; /*! Number of sources */ uint8_t srcNum:5; /*! To store various indices */ uint16_t index; /*! Variable sized. Destinations and sources go here */ GenRegister regs[0]; private: /*! Just Selection class can create SelectionInstruction */ SelectionInstruction(SelectionOpcode, uint32_t dstNum, uint32_t srcNum); // Allocates (with a linear allocator) and owns SelectionInstruction friend class Selection; }; /*! Instructions like sends require to make registers contiguous in GRF */ class SelectionVector : public NonCopyable, public intrusive_list_node { public: SelectionVector(void); /*! The instruction that requires the vector of registers */ SelectionInstruction *insn; /*! Directly points to the selection instruction registers */ GenRegister *reg; /*! Number of registers in the vector */ uint16_t regNum; /*! Indicate if this a destination or a source vector */ uint16_t isSrc; }; // Owns the selection block class Selection; /*! A selection block is the counterpart of the IR Basic block. It contains * the instructions generated from an IR basic block */ class SelectionBlock : public NonCopyable, public intrusive_list_node { public: SelectionBlock(const ir::BasicBlock *bb); /*! All the emitted instructions in the block */ intrusive_list insnList; /*! The vectors that may be required by some instructions of the block */ intrusive_list vectorList; /*! Extra registers needed by the block (only live in the block) */ gbe::vector tmp; /*! Associated IR basic block */ const ir::BasicBlock *bb; /*! Append a new temporary register */ void append(ir::Register reg); /*! Append a new selection vector in the block */ void append(SelectionVector *vec); /*! Append a new selection instruction at the end of the block */ void append(SelectionInstruction *insn); /*! Append a new selection instruction at the beginning of the block */ void prepend(SelectionInstruction *insn); }; /*! Owns the selection engine */ class GenContext; /*! Selection engine produces the pre-ISA instruction blocks */ class Selection { public: /*! Initialize internal structures used for the selection */ Selection(GenContext &ctx); /*! Release everything */ ~Selection(void); /*! Implements the instruction selection itself */ void select(void); /*! Bool and scalar register use scalar physical registers */ bool isScalarOrBool(ir::Register reg) const; /*! Get the number of instructions of the largest block */ uint32_t getLargestBlockSize(void) const; /*! Number of register vectors in the selection */ uint32_t getVectorNum(void) const; /*! Number of registers (temporaries are created during selection) */ uint32_t getRegNum(void) const; /*! Get the family for the given register */ ir::RegisterFamily getRegisterFamily(ir::Register reg) const; /*! Get the data for the given register */ ir::RegisterData getRegisterData(ir::Register reg) const; /*! Replace a source by the returned temporary register */ ir::Register replaceSrc(SelectionInstruction *insn, uint32_t regID); /*! Replace a destination to the returned temporary register */ ir::Register replaceDst(SelectionInstruction *insn, uint32_t regID); /*! spill a register (insert spill/unspill instructions) */ void spillReg(ir::Register reg, uint32_t registerPool); /*! Create a new selection instruction */ SelectionInstruction *create(SelectionOpcode, uint32_t dstNum, uint32_t srcNum); /*! List of emitted blocks */ intrusive_list *blockList; /*! Actual implementation of the register allocator (use Pimpl) */ class Opaque; /*! Created and destroyed in cpp */ Opaque *opaque; /*! Use custom allocators */ GBE_CLASS(Selection); }; } /* namespace gbe */ #endif /* __GEN_INSN_SELECTION_HPP__ */ Release_v0.3/backend/src/backend/gen_insn_selection.hxx000066400000000000000000000072301223142177000233500ustar00rootroot00000000000000DECL_SELECTION_IR(LABEL, LabelInstruction) DECL_SELECTION_IR(MOV, UnaryInstruction) DECL_SELECTION_IR(MOV_DF, UnaryWithTempInstruction) DECL_SELECTION_IR(LOAD_DF_IMM, UnaryWithTempInstruction) DECL_SELECTION_IR(LOAD_INT64_IMM, UnaryInstruction) DECL_SELECTION_IR(NOT, UnaryInstruction) DECL_SELECTION_IR(LZD, UnaryInstruction) DECL_SELECTION_IR(RNDZ, UnaryInstruction) DECL_SELECTION_IR(RNDE, UnaryInstruction) DECL_SELECTION_IR(RNDD, UnaryInstruction) DECL_SELECTION_IR(RNDU, UnaryInstruction) DECL_SELECTION_IR(FRC, UnaryInstruction) DECL_SELECTION_IR(SEL, BinaryInstruction) DECL_SELECTION_IR(SEL_INT64, BinaryInstruction) DECL_SELECTION_IR(AND, BinaryInstruction) DECL_SELECTION_IR(OR, BinaryInstruction) DECL_SELECTION_IR(XOR, BinaryInstruction) DECL_SELECTION_IR(I64AND, BinaryInstruction) DECL_SELECTION_IR(I64OR, BinaryInstruction) DECL_SELECTION_IR(I64XOR, BinaryInstruction) DECL_SELECTION_IR(SHR, BinaryInstruction) DECL_SELECTION_IR(SHL, BinaryInstruction) DECL_SELECTION_IR(RSR, BinaryInstruction) DECL_SELECTION_IR(RSL, BinaryInstruction) DECL_SELECTION_IR(ASR, BinaryInstruction) DECL_SELECTION_IR(I64SHR, I64ShiftInstruction) DECL_SELECTION_IR(I64SHL, I64ShiftInstruction) DECL_SELECTION_IR(I64ASR, I64ShiftInstruction) DECL_SELECTION_IR(ADD, BinaryInstruction) DECL_SELECTION_IR(I64ADD, BinaryWithTempInstruction) DECL_SELECTION_IR(I64SATADD, I64SATADDInstruction) DECL_SELECTION_IR(I64SUB, BinaryWithTempInstruction) DECL_SELECTION_IR(I64SATSUB, I64SATSUBInstruction) DECL_SELECTION_IR(MUL, BinaryInstruction) DECL_SELECTION_IR(I64MUL, I64MULInstruction) DECL_SELECTION_IR(I64DIV, I64DIVREMInstruction) DECL_SELECTION_IR(I64REM, I64DIVREMInstruction) DECL_SELECTION_IR(ATOMIC, AtomicInstruction) DECL_SELECTION_IR(MACH, BinaryInstruction) DECL_SELECTION_IR(CMP, CompareInstruction) DECL_SELECTION_IR(I64CMP, I64CompareInstruction) DECL_SELECTION_IR(SEL_CMP, CompareInstruction) DECL_SELECTION_IR(MAD, TernaryInstruction) DECL_SELECTION_IR(JMPI, JumpInstruction) DECL_SELECTION_IR(EOT, EotInstruction) DECL_SELECTION_IR(INDIRECT_MOVE, IndirectMoveInstruction) DECL_SELECTION_IR(NOP, NoOpInstruction) DECL_SELECTION_IR(WAIT, WaitInstruction) DECL_SELECTION_IR(MATH, MathInstruction) DECL_SELECTION_IR(BARRIER, BarrierInstruction) DECL_SELECTION_IR(FENCE, FenceInstruction) DECL_SELECTION_IR(UNTYPED_READ, UntypedReadInstruction) DECL_SELECTION_IR(UNTYPED_WRITE, UntypedWriteInstruction) DECL_SELECTION_IR(READ64, Read64Instruction) DECL_SELECTION_IR(WRITE64, Write64Instruction) DECL_SELECTION_IR(BYTE_GATHER, ByteGatherInstruction) DECL_SELECTION_IR(BYTE_SCATTER, ByteScatterInstruction) DECL_SELECTION_IR(DWORD_GATHER, DWordGatherInstruction) DECL_SELECTION_IR(SAMPLE, SampleInstruction) DECL_SELECTION_IR(TYPED_WRITE, TypedWriteInstruction) DECL_SELECTION_IR(GET_IMAGE_INFO, GetImageInfoInstruction) DECL_SELECTION_IR(SPILL_REG, SpillRegInstruction) DECL_SELECTION_IR(UNSPILL_REG, UnSpillRegInstruction) DECL_SELECTION_IR(MUL_HI, BinaryWithTempInstruction) DECL_SELECTION_IR(I64_MUL_HI, I64MULHIInstruction) DECL_SELECTION_IR(FBH, UnaryInstruction) DECL_SELECTION_IR(FBL, UnaryInstruction) DECL_SELECTION_IR(HADD, BinaryWithTempInstruction) DECL_SELECTION_IR(RHADD, BinaryWithTempInstruction) DECL_SELECTION_IR(I64HADD, I64HADDInstruction) DECL_SELECTION_IR(I64RHADD, I64RHADDInstruction) DECL_SELECTION_IR(UPSAMPLE_SHORT, BinaryInstruction) DECL_SELECTION_IR(UPSAMPLE_INT, BinaryInstruction) DECL_SELECTION_IR(UPSAMPLE_LONG, BinaryInstruction) DECL_SELECTION_IR(CONVI_TO_I64, UnaryWithTempInstruction) DECL_SELECTION_IR(CONVF_TO_I64, UnaryWithTempInstruction) DECL_SELECTION_IR(CONVI64_TO_I, UnaryInstruction) DECL_SELECTION_IR(CONVI64_TO_F, I64ToFloatInstruction) DECL_SELECTION_IR(I64MADSAT, I64MADSATInstruction) Release_v0.3/backend/src/backend/gen_program.cpp000066400000000000000000000121421223142177000217540ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file program.cpp * \author Benjamin Segovia */ #include "backend/program.h" #include "backend/gen_program.h" #include "backend/gen_program.hpp" #include "backend/gen_context.hpp" #include "backend/gen_defs.hpp" #include "backend/gen/gen_mesa_disasm.h" #include "backend/gen_reg_allocation.hpp" #include "ir/unit.hpp" #include "llvm/llvm_to_gen.hpp" #include #include #include #include #include namespace gbe { GenKernel::GenKernel(const std::string &name) : Kernel(name), insns(NULL), insnNum(0) {} GenKernel::~GenKernel(void) { GBE_SAFE_DELETE_ARRAY(insns); } const char *GenKernel::getCode(void) const { return (const char*) insns; } const void GenKernel::setCode(const char * ins, size_t size) { insns = (GenInstruction *)ins; insnNum = size / sizeof(GenInstruction); } size_t GenKernel::getCodeSize(void) const { return insnNum * sizeof(GenInstruction); } void GenKernel::printStatus(int indent, std::ostream& outs) { Kernel::printStatus(indent, outs); FILE *f = fopen("/dev/null", "w"); char *buf = new char[4096]; setbuffer(f, buf, 4096); for (uint32_t i = 0; i < insnNum; i++) { gen_disasm(f, insns+i); outs << buf; fflush(f); setbuffer(f, NULL, 0); setbuffer(f, buf, 4096); } setbuffer(f, NULL, 0); delete [] buf; fclose(f); } GenProgram::GenProgram(void) {} GenProgram::~GenProgram(void) {} /*! We must avoid spilling at all cost with Gen */ static const struct CodeGenStrategy { uint32_t simdWidth; bool limitRegisterPressure; } codeGenStrategy[] = { {16,false}, {16,true}, {8,false}, {8,true}, }; Kernel *GenProgram::compileKernel(const ir::Unit &unit, const std::string &name) { // Be careful when the simdWidth is forced by the programmer. We can see it // when the function already provides the simd width we need to use (i.e. // non zero) const ir::Function *fn = unit.getFunction(name); const uint32_t codeGenNum = fn->getSimdWidth() != 0 ? 2 : 4; uint32_t codeGen = fn->getSimdWidth() == 8 ? 2 : 0; Kernel *kernel = NULL; // Stop when compilation is successful for (; codeGen < codeGenNum; ++codeGen) { const uint32_t simdWidth = codeGenStrategy[codeGen].simdWidth; const bool limitRegisterPressure = codeGenStrategy[codeGen].limitRegisterPressure; // Force the SIMD width now and try to compile unit.getFunction(name)->setSimdWidth(simdWidth); Context *ctx = GBE_NEW(GenContext, unit, name, limitRegisterPressure); kernel = ctx->compileKernel(); if (kernel != NULL) { break; } GBE_DELETE(ctx); } // XXX spill must be implemented GBE_ASSERTM(kernel != NULL, "Register spilling not supported yet!"); return kernel; } static gbe_program genProgramNewFromBinary(const char *binary, size_t size) { using namespace gbe; std::string binary_content; binary_content.assign(binary, size); GenProgram *program = GBE_NEW_NO_ARG(GenProgram); std::istringstream ifs(binary_content, std::ostringstream::binary); if (!program->deserializeFromBin(ifs)) { delete program; return NULL; } //program->printStatus(0, std::cout); return reinterpret_cast(program); } static gbe_program genProgramNewFromLLVM(const char *fileName, size_t stringSize, char *err, size_t *errSize) { using namespace gbe; GenProgram *program = GBE_NEW_NO_ARG(GenProgram); std::string error; // Try to compile the program if (program->buildFromLLVMFile(fileName, error) == false) { if (err != NULL && errSize != NULL && stringSize > 0u) { const size_t msgSize = std::min(error.size(), stringSize-1u); std::memcpy(err, error.c_str(), msgSize); *errSize = error.size(); } GBE_DELETE(program); return NULL; } // Everything run fine return (gbe_program) program; } } /* namespace gbe */ void genSetupCallBacks(void) { gbe_program_new_from_binary = gbe::genProgramNewFromBinary; gbe_program_new_from_llvm = gbe::genProgramNewFromLLVM; } sem_t llvm_semaphore; void genSetupLLVMSemaphore(void) { sem_init(&llvm_semaphore, 0, 1); } Release_v0.3/backend/src/backend/gen_program.h000066400000000000000000000023321223142177000214210ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file program.h * \author Benjamin Segovia * * C-like interface for the gen kernels and programs */ #ifndef __GBE_GEN_PROGRAM_H__ #define __GBE_GEN_PROGRAM_H__ #include #include #include /*! This will make the compiler output Gen ISA code */ extern void genSetupCallBacks(void); extern sem_t llvm_semaphore; extern void genSetupLLVMSemaphore(void); #endif /* __GBE_GEN_PROGRAM_H__ */ Release_v0.3/backend/src/backend/gen_program.hpp000066400000000000000000000045561223142177000217730ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file program.hpp * \author Benjamin Segovia */ #ifndef __GBE_GEN_PROGRAM_HPP__ #define __GBE_GEN_PROGRAM_HPP__ #include "backend/program.h" #include "backend/program.hpp" // Gen ISA instruction struct GenInstruction; namespace gbe { /*! Describe a compiled kernel */ class GenKernel : public Kernel { public: /*! Create an empty kernel with the given name */ GenKernel(const std::string &name); /*! Destroy it */ virtual ~GenKernel(void); /*! Implements base class */ virtual const char *getCode(void) const; /*! Set the instruction stream (to be implemented) */ virtual const void setCode(const char *, size_t size); /*! Implements get the code size */ virtual size_t getCodeSize(void) const; /*! Implements printStatus*/ virtual void printStatus(int indent, std::ostream& outs); GenInstruction *insns; //!< Instruction stream uint32_t insnNum; //!< Number of instructions GBE_CLASS(GenKernel); //!< Use custom allocators }; /*! Describe a compiled program */ class GenProgram : public Program { public: /*! Create an empty program */ GenProgram(void); /*! Destroy the program */ virtual ~GenProgram(void); /*! Implements base class */ virtual Kernel *compileKernel(const ir::Unit &unit, const std::string &name); /*! Allocate an empty kernel. */ virtual Kernel *allocateKernel(const std::string &name) { return GBE_NEW(GenKernel, name); } /*! Use custom allocators */ GBE_CLASS(GenProgram); }; } /* namespace gbe */ #endif /* __GBE_GEN_PROGRAM_HPP__ */ Release_v0.3/backend/src/backend/gen_reg_allocation.cpp000066400000000000000000000702151223142177000232740ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file gen_reg_allocation.cpp * \author Benjamin Segovia */ #include "ir/profile.hpp" #include "ir/function.hpp" #include "backend/gen_insn_selection.hpp" #include "backend/gen_reg_allocation.hpp" #include "backend/gen_register.hpp" #include "backend/program.hpp" #include "sys/exception.hpp" #include #include #define RESERVED_REG_NUM_FOR_SPILL 6 namespace gbe { ///////////////////////////////////////////////////////////////////////////// // Register allocator internal implementation ///////////////////////////////////////////////////////////////////////////// /*! Provides the location of a register in a vector */ typedef std::pair VectorLocation; /*! Implements the register allocation */ class GenRegAllocator::Opaque { public: /*! Initialize the register allocator */ Opaque(GenContext &ctx); /*! Release all taken resources */ ~Opaque(void); /*! Perform the register allocation. Return true if success */ bool allocate(Selection &selection); /*! Return the Gen register from the selection register */ GenRegister genReg(const GenRegister ®); /*! Output the register allocation */ void outputAllocation(void); private: /*! Expire one GRF interval. Return true if one was successfully expired */ bool expireGRF(const GenRegInterval &limit); /*! Expire a flag register. Return true if one was successfully expired */ bool expireFlag(const GenRegInterval &limit); /*! Allocate the virtual boolean (== flags) registers */ void allocateFlags(Selection &selection); /*! Allocate the GRF registers */ bool allocateGRFs(Selection &selection); /*! Create gen registers for all preallocated curbe registers. */ void allocatePayloadRegs(void); /*! Create a Gen register from a register set in the payload */ void allocatePayloadReg(ir::Register, uint32_t offset, uint32_t subOffset = 0); /*! Create the intervals for each register */ /*! Allocate the vectors detected in the instruction selection pass */ void allocateVector(Selection &selection); /*! Allocate the given interval. Return true if success */ bool createGenReg(const GenRegInterval &interval); /*! Indicate if the registers are already allocated in vectors */ bool isAllocated(const SelectionVector *vector) const; /*! Reallocate registers if needed to make the registers in the vector * contigous in memory */ void coalesce(Selection &selection, SelectionVector *vector); /*! The context owns the register allocator */ GenContext &ctx; /*! Map virtual registers to offset in the (physical) register file */ map RA; /*! Provides the position of each register in a vector */ map vectorMap; /*! All vectors used in the selection */ vector vectors; /*! The set of booleans that will go to GRF (cannot be kept into flags) */ set grfBooleans; /*! All the register intervals */ vector intervals; /*! Intervals sorting based on starting point positions */ vector starting; /*! Intervals sorting based on ending point positions */ vector ending; /*! registers that are spilled */ set spilled; /* reserved registers for register spill/reload */ uint32_t reservedReg; /*! Current vector to expire */ uint32_t expiringID; /*! Use custom allocator */ GBE_CLASS(Opaque); }; // Note that byte vector registers use two bytes per byte (and can be // interleaved) static const size_t familyVectorSize[] = {2,2,2,4,8}; static const size_t familyScalarSize[] = {2,1,2,4,8}; /*! Interval as used in linear scan allocator. Basically, stores the first and * the last instruction where the register is alive */ struct GenRegInterval { INLINE GenRegInterval(ir::Register reg) : reg(reg), minID(INT_MAX), maxID(-INT_MAX) {} ir::Register reg; //!< (virtual) register of the interval int32_t minID, maxID; //!< Starting and ending points }; GenRegAllocator::Opaque::Opaque(GenContext &ctx) : ctx(ctx) {} GenRegAllocator::Opaque::~Opaque(void) {} void GenRegAllocator::Opaque::allocatePayloadReg(ir::Register reg, uint32_t offset, uint32_t subOffset) { using namespace ir; assert(offset >= GEN_REG_SIZE); offset += subOffset; RA.insert(std::make_pair(reg, offset)); GBE_ASSERT(reg != ocl::blockip || (offset % GEN_REG_SIZE == 0)); this->intervals[reg].minID = 0; this->intervals[reg].maxID = 0; } INLINE void GenRegAllocator::Opaque::allocatePayloadRegs(void) { using namespace ir; for(auto &it : this->ctx.curbeRegs) if (it.first.value() < 0x8000) allocatePayloadReg(it.first, it.second); // Allocate all pushed registers (i.e. structure kernel arguments) const Function &fn = ctx.getFunction(); GBE_ASSERT(fn.getProfile() == PROFILE_OCL); const Function::PushMap &pushMap = fn.getPushMap(); for (auto rit = pushMap.rbegin(); rit != pushMap.rend(); ++rit) { const uint32_t argID = rit->second.argID; const FunctionArgument arg = fn.getArg(argID); const uint32_t subOffset = rit->second.offset; const Register reg = rit->second.getRegister(); auto it = this->ctx.curbeRegs.find(arg.reg); assert(it != ctx.curbeRegs.end()); allocatePayloadReg(reg, it->second, subOffset); ctx.splitBlock(it->second, subOffset); } } bool GenRegAllocator::Opaque::createGenReg(const GenRegInterval &interval) { using namespace ir; const ir::Register reg = interval.reg; const uint32_t simdWidth = ctx.getSimdWidth(); if (RA.contains(reg) == true) return true; // already allocated GBE_ASSERT(ctx.isScalarReg(reg) == false); const bool isScalar = ctx.sel->isScalarOrBool(reg); const RegisterData regData = ctx.sel->getRegisterData(reg); const RegisterFamily family = regData.family; const uint32_t typeSize = isScalar ? familyScalarSize[family] : familyVectorSize[family]; const uint32_t regSize = isScalar ? typeSize : simdWidth*typeSize; uint32_t grfOffset; while ((grfOffset = ctx.allocate(regSize, regSize)) == 0) { const bool success = this->expireGRF(interval); if (UNLIKELY(success == false)) return false; } GBE_ASSERTM(grfOffset != 0, "Unable to register allocate"); RA.insert(std::make_pair(reg, grfOffset)); return true; } bool GenRegAllocator::Opaque::isAllocated(const SelectionVector *vector) const { const ir::Register first = vector->reg[0].reg(); const auto it = vectorMap.find(first); // If the first register is not allocated we are done if (it == vectorMap.end()) return false; // If there are more left registers than in the found vector, there are // still registers to allocate const SelectionVector *other = it->second.first; const uint32_t otherFirst = it->second.second; const uint32_t leftNum = other->regNum - otherFirst; if (leftNum < vector->regNum) return false; // Now check that all the registers in the already allocated vector match // the current vector for (uint32_t regID = 1; regID < vector->regNum; ++regID) { const ir::Register from = vector->reg[regID].reg(); const ir::Register to = other->reg[regID + otherFirst].reg(); if (from != to) return false; } return true; } void GenRegAllocator::Opaque::coalesce(Selection &selection, SelectionVector *vector) { for (uint32_t regID = 0; regID < vector->regNum; ++regID) { const ir::Register reg = vector->reg[regID].reg(); const auto it = this->vectorMap.find(reg); // case 1: the register is not already in a vector, so it can stay in this // vector. Note that local IDs are *non-scalar* special registers but will // require a MOV anyway since pre-allocated in the CURBE if (it == vectorMap.end() && ctx.sel->isScalarOrBool(reg) == false && ctx.isSpecialReg(reg) == false) { const VectorLocation location = std::make_pair(vector, regID); this->vectorMap.insert(std::make_pair(reg, location)); } // case 2: the register is already in another vector, so we need to move // it to a temporary register. // TODO: we can do better than that if we analyze the liveness of the // already allocated registers in the vector. If there is no inteference // and the order is maintained, we can reuse the previous vector and avoid // the MOVs else { ir::Register tmp; if (vector->isSrc) tmp = selection.replaceSrc(vector->insn, regID); else tmp = selection.replaceDst(vector->insn, regID); const VectorLocation location = std::make_pair(vector, regID); this->vectorMap.insert(std::make_pair(tmp, location)); } } } /*! Will sort vector in decreasing order */ inline bool cmp(const SelectionVector *v0, const SelectionVector *v1) { return v0->regNum > v1->regNum; } void GenRegAllocator::Opaque::allocateVector(Selection &selection) { const uint32_t vectorNum = selection.getVectorNum(); this->vectors.resize(vectorNum); // First we find and store all vectors uint32_t vectorID = 0; for (auto &block : *selection.blockList) for (auto &v : block.vectorList) this->vectors[vectorID++] = &v; GBE_ASSERT(vectorID == vectorNum); // Heuristic (really simple...): sort them by the number of registers they // contain std::sort(this->vectors.begin(), this->vectors.end(), cmp); // Insert MOVs when this is required for (vectorID = 0; vectorID < vectorNum; ++vectorID) { SelectionVector *vector = this->vectors[vectorID]; if (this->isAllocated(vector)) continue; this->coalesce(selection, vector); } } template inline bool cmp(const GenRegInterval *i0, const GenRegInterval *i1) { return sortStartingPoint ? i0->minID < i1->minID : i0->maxID < i1->maxID; } bool GenRegAllocator::Opaque::expireGRF(const GenRegInterval &limit) { while (this->expiringID != ending.size()) { const GenRegInterval *toExpire = this->ending[this->expiringID]; const ir::Register reg = toExpire->reg; // Dead code produced by the insn selection -> we skip it if (toExpire->minID > toExpire->maxID) { this->expiringID++; continue; } //ignore register that already spilled if(spilled.contains(reg)) { this->expiringID++; continue; } // Ignore booleans that were allocated with flags // if (ctx.getRegisterFamily(reg) == ir::FAMILY_BOOL && !grfBooleans.contains(reg)) { if (ctx.sel->getRegisterFamily(reg) == ir::FAMILY_BOOL) { this->expiringID++; continue; } if (toExpire->maxID >= limit.minID) return false; auto it = RA.find(reg); GBE_ASSERT(it != RA.end()); // offset less than 32 means it is not managed by our reg allocator. if (it->second < 32) { this->expiringID++; continue; } // Case 1 - it does not belong to a vector. Just remove it ctx.deallocate(it->second); this->expiringID++; return true; } // We were not able to expire anything return false; } void GenRegAllocator::Opaque::allocateFlags(Selection &selection) { // Store the registers allocated in the map map allocatedFlags; GenRegInterval spill = ir::Register(ir::RegisterFile::MAX_INDEX); // we have two flags we use for booleans f1.0 and f1.1 const uint32_t flagNum = 2; uint32_t freeFlags[] = {0,1}; uint32_t freeNum = flagNum; // Perform the linear scan allocator on the flag registers only. We only use // two flags registers for the booleans right now: f1.0 and f1.1 const uint32_t regNum = ctx.sel->getRegNum(); uint32_t endID = 0; // interval to expire for (uint32_t startID = 0; startID < regNum; ++startID) { const GenRegInterval &interval = *this->starting[startID]; const ir::Register reg = interval.reg; if (ctx.sel->getRegisterFamily(reg) != ir::FAMILY_BOOL) continue; // Not a flag. We don't care if (grfBooleans.contains(reg)) continue; // Cannot use a flag register if (interval.maxID == -INT_MAX) continue; // Unused register if (freeNum != 0) { spill = interval; allocatedFlags.insert(std::make_pair(reg, freeFlags[--freeNum])); } else { // Try to expire one register while (endID != ending.size()) { const GenRegInterval *toExpire = this->ending[endID]; const ir::Register reg = toExpire->reg; // Dead code produced by the insn selection -> we skip it if (toExpire->minID > toExpire->maxID) { endID++; continue; } // We cannot expire this interval and the next ones if (toExpire->maxID >= interval.minID) break; // Must be a boolean allocated with a flag register if (ctx.sel->getRegisterFamily(reg) != ir::FAMILY_BOOL || grfBooleans.contains(reg)) { endID++; continue; } // We reuse a flag from a previous interval (the oldest one) auto it = allocatedFlags.find(toExpire->reg); GBE_ASSERT(it != allocatedFlags.end()); freeFlags[freeNum++] = it->second; endID++; break; } // We need to spill one of the previous boolean values if (freeNum == 0) { GBE_ASSERT(uint16_t(spill.reg) != ir::RegisterFile::MAX_INDEX); // We spill the last inserted boolean and use its flag instead for // this one if (spill.maxID > interval.maxID) { auto it = allocatedFlags.find(spill.reg); GBE_ASSERT(it != allocatedFlags.end()); allocatedFlags.insert(std::make_pair(reg, it->second)); allocatedFlags.erase(spill.reg); grfBooleans.insert(spill.reg); spill = interval; } // We will a grf for the current register else grfBooleans.insert(reg); } else allocatedFlags.insert(std::make_pair(reg, freeFlags[--freeNum])); } } // Now, we traverse all the selection instructions and we patch them to make // them use flag registers for (auto &block : *selection.blockList) for (auto &insn : block.insnList) { const uint32_t srcNum = insn.srcNum, dstNum = insn.dstNum; // Patch the source booleans for (uint32_t srcID = 0; srcID < srcNum; ++srcID) { const GenRegister selReg = insn.src(srcID); const ir::Register reg = selReg.reg(); if (selReg.physical || ctx.sel->getRegisterFamily(reg) != ir::FAMILY_BOOL) continue; auto it = allocatedFlags.find(reg); if (it == allocatedFlags.end()) continue; // Use a flag register for it now insn.src(srcID) = GenRegister::flag(1,it->second); } // Patch the destination booleans for (uint32_t dstID = 0; dstID < dstNum; ++dstID) { const GenRegister selReg = insn.dst(dstID); const ir::Register reg = selReg.reg(); if (selReg.physical || ctx.sel->getRegisterFamily(reg) != ir::FAMILY_BOOL) continue; auto it = allocatedFlags.find(reg); if (it == allocatedFlags.end()) continue; // Use a flag register for it now insn.dst(dstID) = GenRegister::flag(1,it->second); } // Patch the predicate now. Note that only compares actually modify it (it // is called a "conditional modifier"). The other instructions just read // it if (insn.state.physicalFlag == 0) { auto it = allocatedFlags.find(ir::Register(insn.state.flagIndex)); // Just patch it if we can use a flag directly if (it != allocatedFlags.end()) { insn.state.flag = 1; insn.state.subFlag = it->second; insn.state.physicalFlag = 1; } // When we let the boolean in a GRF, use f0.1 as a temporary else { // Mov the GRF to the flag such that the flag can be read SelectionInstruction *mov0 = selection.create(SEL_OP_MOV,1,1); mov0->state = GenInstructionState(1); mov0->state.predicate = GEN_PREDICATE_NONE; mov0->state.noMask = 1; mov0->src(0) = GenRegister::uw1grf(ir::Register(insn.state.flagIndex)); mov0->dst(0) = GenRegister::flag(0,1); // Do not prepend if the flag is not read (== used only as a // conditional modifier) if (insn.state.predicate != GEN_PREDICATE_NONE) insn.prepend(*mov0); // We can use f0.1 (our "backdoor" flag) insn.state.flag = 0; insn.state.subFlag = 1; insn.state.physicalFlag = 1; // Compare instructions update the flags so we must copy it back to // the GRF if (insn.opcode == SEL_OP_CMP || insn.opcode == SEL_OP_I64CMP) { SelectionInstruction *mov1 = selection.create(SEL_OP_MOV,1,1); mov1->state = mov0->state; mov1->dst(0) = mov0->src(0); mov1->src(0) = mov0->dst(0); insn.append(*mov1); } } } } } bool GenRegAllocator::Opaque::allocateGRFs(Selection &selection) { // Perform the linear scan allocator const uint32_t regNum = ctx.sel->getRegNum(); for (uint32_t startID = 0; startID < regNum; ++startID) { const GenRegInterval &interval = *this->starting[startID]; const ir::Register reg = interval.reg; if (interval.maxID == -INT_MAX) continue; // Unused register if (RA.contains(reg)) continue; // already allocated // Case 1: the register belongs to a vector, allocate all the registers in // one piece auto it = vectorMap.find(reg); if (it != vectorMap.end()) { const SelectionVector *vector = it->second.first; // all the reg in the SelectionVector are spilled if(spilled.contains(vector->reg[0].reg())) continue; const uint32_t simdWidth = ctx.getSimdWidth(); const ir::RegisterData regData = ctx.sel->getRegisterData(reg); const ir::RegisterFamily family = regData.family; const uint32_t typeSize = familyVectorSize[family]; const uint32_t alignment = simdWidth*typeSize; const uint32_t size = vector->regNum * alignment; uint32_t grfOffset; while ((grfOffset = ctx.allocate(size, alignment)) == 0) { const bool success = this->expireGRF(interval); if (success == false) { // if no spill support, just return false, else simply spill the register if(reservedReg == 0) return false; break; } } if(grfOffset == 0) { // spill all the registers in the SelectionVector // the tricky here is I need to use reservedReg+1 as scratch write payload. // so, i need to write the first register to scratch memory first. // the spillReg() will just append scratch write insn after the def. To spill // the first register, need to call spillReg() last for the vector->reg[0] GBE_ASSERT(vector->regNum < RESERVED_REG_NUM_FOR_SPILL); for(int i = vector->regNum-1; i >= 0; i--) { spilled.insert(vector->reg[i].reg()); selection.spillReg(vector->reg[i].reg(), reservedReg); } continue; } for (uint32_t regID = 0; regID < vector->regNum; ++regID) { const ir::Register reg = vector->reg[regID].reg(); GBE_ASSERT(RA.contains(reg) == false && ctx.sel->getRegisterData(reg).family == family); RA.insert(std::make_pair(reg, grfOffset + alignment * regID)); ctx.splitBlock(grfOffset, alignment * regID); //splitBlock will not split if regID == 0 } } // Case 2: This is a regular scalar register, allocate it alone else if (this->createGenReg(interval) == false) { if(reservedReg == 0) return false; spilled.insert(reg); selection.spillReg(reg, reservedReg); } } return true; } INLINE bool GenRegAllocator::Opaque::allocate(Selection &selection) { using namespace ir; if (ctx.getSimdWidth() == 8) { reservedReg = ctx.allocate(RESERVED_REG_NUM_FOR_SPILL * GEN_REG_SIZE, GEN_REG_SIZE); reservedReg /= GEN_REG_SIZE; } else { reservedReg = 0; } // Allocate all the vectors first since they need to be contiguous this->allocateVector(selection); // schedulePreRegAllocation(ctx, selection); // Now start the linear scan allocation for (uint32_t regID = 0; regID < ctx.sel->getRegNum(); ++regID) this->intervals.push_back(ir::Register(regID)); // Allocate the special registers (only those which are actually used) this->allocatePayloadRegs(); // Group and barrier IDs are always allocated by the hardware in r0 RA.insert(std::make_pair(ocl::groupid0, 1*sizeof(float))); // r0.1 RA.insert(std::make_pair(ocl::groupid1, 6*sizeof(float))); // r0.6 RA.insert(std::make_pair(ocl::groupid2, 7*sizeof(float))); // r0.7 RA.insert(std::make_pair(ocl::barrierid, 2*sizeof(float))); // r0.2 // block IP used to handle the mask in SW is always allocated // Compute the intervals int32_t insnID = 0; for (auto &block : *selection.blockList) { int32_t lastID = insnID; // Update the intervals of each used register. Note that we do not // register allocate R0, so we skip all sub-registers in r0 for (auto &insn : block.insnList) { const uint32_t srcNum = insn.srcNum, dstNum = insn.dstNum; for (uint32_t srcID = 0; srcID < srcNum; ++srcID) { const GenRegister &selReg = insn.src(srcID); const ir::Register reg = selReg.reg(); if (selReg.file != GEN_GENERAL_REGISTER_FILE || reg == ir::ocl::barrierid || reg == ir::ocl::groupid0 || reg == ir::ocl::groupid1 || reg == ir::ocl::groupid2) continue; this->intervals[reg].minID = std::min(this->intervals[reg].minID, insnID); this->intervals[reg].maxID = std::max(this->intervals[reg].maxID, insnID); } for (uint32_t dstID = 0; dstID < dstNum; ++dstID) { const GenRegister &selReg = insn.dst(dstID); const ir::Register reg = selReg.reg(); if (selReg.file != GEN_GENERAL_REGISTER_FILE || reg == ir::ocl::barrierid || reg == ir::ocl::groupid0 || reg == ir::ocl::groupid1 || reg == ir::ocl::groupid2) continue; this->intervals[reg].minID = std::min(this->intervals[reg].minID, insnID); this->intervals[reg].maxID = std::max(this->intervals[reg].maxID, insnID); } // Flag registers can only go to src[0] const SelectionOpcode opcode = SelectionOpcode(insn.opcode); if (opcode == SEL_OP_AND || opcode == SEL_OP_OR || opcode == SEL_OP_XOR || opcode == SEL_OP_I64AND || opcode == SEL_OP_I64OR || opcode == SEL_OP_I64XOR) { if (insn.src(1).physical == 0) { const ir::Register reg = insn.src(1).reg(); if (ctx.sel->getRegisterFamily(reg) == ir::FAMILY_BOOL) grfBooleans.insert(reg); } } // OK, a flag is used as a predicate or a conditional modifier if (insn.state.physicalFlag == 0) { const ir::Register reg = ir::Register(insn.state.flagIndex); this->intervals[reg].minID = std::min(this->intervals[reg].minID, insnID); this->intervals[reg].maxID = std::max(this->intervals[reg].maxID, insnID); } lastID = insnID; insnID++; } // All registers alive at the end of the block must have their intervals // updated as well const ir::BasicBlock *bb = block.bb; const ir::Liveness::LiveOut &liveOut = ctx.getLiveOut(bb); for (auto reg : liveOut) { this->intervals[reg].minID = std::min(this->intervals[reg].minID, lastID); this->intervals[reg].maxID = std::max(this->intervals[reg].maxID, lastID); } } // Sort both intervals in starting point and ending point increasing orders const uint32_t regNum = ctx.sel->getRegNum(); this->starting.resize(regNum); this->ending.resize(regNum); for (uint32_t regID = 0; regID < regNum; ++regID) this->starting[regID] = this->ending[regID] = &intervals[regID]; std::sort(this->starting.begin(), this->starting.end(), cmp); std::sort(this->ending.begin(), this->ending.end(), cmp); // Remove the registers that were not allocated this->expiringID = 0; while (this->expiringID < regNum) { const GenRegInterval *interval = ending[this->expiringID]; if (interval->maxID == -INT_MAX) this->expiringID++; else break; } // First we try to put all booleans registers into flags this->allocateFlags(selection); // Allocate all the GRFs now (regular register and boolean that are not in // flag registers) return this->allocateGRFs(selection); } INLINE void GenRegAllocator::Opaque::outputAllocation(void) { std::cout << "## register allocation ##" << std::endl; for(auto &i : RA) { int vReg = (int)i.first; int offst = (int)i.second / sizeof(float); int reg = offst / 8; int subreg = offst % 8; std::cout << "%" << vReg << " g" << reg << "." << subreg << "D" << std::endl; } std::set::iterator is; std::cout << "## spilled registers:" << std::endl; for(is = spilled.begin(); is != spilled.end(); is++) std::cout << (int)*is << std::endl; std::cout << std::endl; } INLINE GenRegister setGenReg(const GenRegister &src, uint32_t grfOffset) { GenRegister dst; dst = src; dst.physical = 1; dst.nr = grfOffset / GEN_REG_SIZE; dst.subnr = grfOffset % GEN_REG_SIZE; return dst; } INLINE GenRegister GenRegAllocator::Opaque::genReg(const GenRegister ®) { if (reg.file == GEN_GENERAL_REGISTER_FILE) { if(reg.physical == 1) { return reg; } GBE_ASSERT(RA.contains(reg.reg()) != false); const uint32_t grfOffset = RA.find(reg.reg())->second; const uint32_t suboffset = reg.subphysical ? reg.subnr : 0; const GenRegister dst = setGenReg(reg, grfOffset + suboffset); if (reg.quarter != 0) return GenRegister::Qn(dst, reg.quarter); else return dst; } else return reg; } ///////////////////////////////////////////////////////////////////////////// // Register allocator public implementation ///////////////////////////////////////////////////////////////////////////// GenRegAllocator::GenRegAllocator(GenContext &ctx) { this->opaque = GBE_NEW(GenRegAllocator::Opaque, ctx); } GenRegAllocator::~GenRegAllocator(void) { GBE_DELETE(this->opaque); } bool GenRegAllocator::allocate(Selection &selection) { return this->opaque->allocate(selection); } GenRegister GenRegAllocator::genReg(const GenRegister ®) { return this->opaque->genReg(reg); } void GenRegAllocator::outputAllocation(void) { this->opaque->outputAllocation(); } } /* namespace gbe */ Release_v0.3/backend/src/backend/gen_reg_allocation.hpp000066400000000000000000000040451223142177000232770ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file gen_reg_allocation.hpp * \author Benjamin Segovia */ #ifndef __GBE_GEN_REG_ALLOCATION_HPP__ #define __GBE_GEN_REG_ALLOCATION_HPP__ #include "ir/register.hpp" #include "backend/gen_register.hpp" namespace gbe { class Selection; // Pre-register allocation code generation class GenRegister; // Pre-register allocation Gen register struct GenRegInterval; // Liveness interval for each register class GenContext; // Gen specific context /*! Register allocate (i.e. virtual to physical register mapping) */ class GenRegAllocator { public: /*! Initialize the register allocator */ GenRegAllocator(GenContext &ctx); /*! Release all taken resources */ ~GenRegAllocator(void); /*! Perform the register allocation */ bool allocate(Selection &selection); /*! Virtual to physical translation */ GenRegister genReg(const GenRegister ®); /*! Output the register allocation */ void outputAllocation(void); private: /*! Actual implementation of the register allocator (use Pimpl) */ class Opaque; /*! Created and destroyed in cpp */ Opaque *opaque; /*! Use custom allocator */ GBE_CLASS(GenRegAllocator); }; } /* namespace gbe */ #endif /* __GBE_GEN_REG_ALLOCATION_HPP__ */ Release_v0.3/backend/src/backend/gen_register.hpp000066400000000000000000000765371223142177000221600ustar00rootroot00000000000000/* * Copyright 2012 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ /* Copyright (C) Intel Corp. 2006. All Rights Reserved. Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to develop this 3D driver. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice (including the next paragraph) shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. **********************************************************************/ /* * Authors: * Keith Whitwell */ /** * \file gen_register.hpp * \author Benjamin Segovia */ #ifndef __GEN_REGISTER_HPP__ #define __GEN_REGISTER_HPP__ #include "backend/gen_defs.hpp" #include "ir/register.hpp" #include "sys/platform.hpp" namespace gbe { /*! Type size in bytes for each Gen type */ INLINE int typeSize(uint32_t type) { switch(type) { case GEN_TYPE_DF: case GEN_TYPE_UL: case GEN_TYPE_L: return 8; case GEN_TYPE_UD: case GEN_TYPE_D: case GEN_TYPE_F: return 4; case GEN_TYPE_UW: case GEN_TYPE_W: return 2; case GEN_TYPE_UB: case GEN_TYPE_B: return 1; default: assert(0); return 0; } } /*! Convert a hstride to a number of element */ INLINE uint32_t stride(uint32_t stride) { switch (stride) { case 0: return 0; case 1: return 1; case 2: return 2; case 3: return 4; case 4: return 8; case 5: return 16; default: assert(0); return 0; } } /*! Encode the instruction state. Note that the flag register can be either * physical (i.e. a real Gen flag) or a virtual boolean register. The flag * register allocation will turn all virtual boolean registers into flag * registers */ class GenInstructionState { public: INLINE GenInstructionState(uint32_t simdWidth = 8) { this->execWidth = simdWidth; this->quarterControl = GEN_COMPRESSION_Q1; this->nibControl = 0; this->accWrEnable = 0; this->noMask = 0; this->flag = 0; this->subFlag = 0; this->predicate = GEN_PREDICATE_NORMAL; this->inversePredicate = 0; this->physicalFlag = 1; this->flagIndex = 0; this->saturate = GEN_MATH_SATURATE_NONE; } uint32_t physicalFlag:1; //!< Physical or virtual flag register uint32_t flag:1; //!< Only if physical flag uint32_t subFlag:1; //!< Only if physical flag uint32_t flagIndex:16; //!< Only if virtual flag (index of the register) uint32_t execWidth:5; uint32_t quarterControl:1; uint32_t nibControl:1; uint32_t accWrEnable:1; uint32_t noMask:1; uint32_t predicate:4; uint32_t inversePredicate:1; uint32_t saturate:1; void chooseNib(int nib) { switch (nib) { case 0: quarterControl = 0; nibControl = 0; break; case 1: quarterControl = 0; nibControl = 1; break; case 2: quarterControl = 1; nibControl = 0; break; case 3: quarterControl = 1; nibControl = 1; break; default: NOT_IMPLEMENTED; } } void useFlag(int nr, int subnr) { flag = nr; subFlag = subnr; physicalFlag = 1; } }; /*! This is a book-keeping structure used to encode both virtual and physical * registers */ class GenRegister { public: /*! Empty constructor */ INLINE GenRegister(void) {} /*! General constructor */ INLINE GenRegister(uint32_t file, ir::Register reg, uint32_t type, uint32_t vstride, uint32_t width, uint32_t hstride) { this->type = type; this->file = file; this->physical = 0; this->value.reg = reg; this->negation = 0; this->absolute = 0; this->vstride = vstride; this->width = width; this->hstride = hstride; this->quarter = 0; this->nr = this->subnr = 0; this->address_mode = GEN_ADDRESS_DIRECT; } /*! For specific physical registers only */ INLINE GenRegister(uint32_t file, uint32_t nr, uint32_t subnr, uint32_t type, uint32_t vstride, uint32_t width, uint32_t hstride) { this->type = type; this->file = file; this->nr = nr; this->physical = 1; this->subnr = subnr * typeSize(type); this->negation = 0; this->absolute = 0; this->vstride = vstride; this->width = width; this->hstride = hstride; this->quarter = 0; this->address_mode = GEN_ADDRESS_DIRECT; } /*! Return the IR virtual register */ INLINE ir::Register reg(void) const { return ir::Register(value.reg); } /*! For immediates or virtual register */ union { double df; float f; int32_t d; uint32_t ud; uint16_t reg; int64_t i64; } value; uint32_t nr:8; //!< Just for some physical registers (acc, null) uint32_t subnr:8; //!< Idem uint32_t physical:1; //!< 1 if physical, 0 otherwise uint32_t subphysical:1;//!< 1 if subnr is physical, 0 otherwise uint32_t type:4; //!< Gen type uint32_t file:2; //!< Register file uint32_t negation:1; //!< For source uint32_t absolute:1; //!< For source uint32_t vstride:4; //!< Vertical stride uint32_t width:3; //!< Width uint32_t hstride:2; //!< Horizontal stride uint32_t quarter:1; //!< To choose which part we want (Q1 / Q2) uint32_t address_mode:1; //!< direct or indirect static INLINE GenRegister offset(GenRegister reg, int nr, int subnr = 0) { GenRegister r = reg; r.nr += nr; r.subnr += subnr; return r; } INLINE bool isint64(void) const { if ((type == GEN_TYPE_UL || type == GEN_TYPE_L) && file == GEN_GENERAL_REGISTER_FILE) return true; return false; } INLINE bool isimmdf(void) const { if (type == GEN_TYPE_DF && file == GEN_IMMEDIATE_VALUE) return true; return false; } INLINE GenRegister top_half(void) const { GenRegister r = bottom_half(); r.subnr += 4; return r; } INLINE GenRegister bottom_half(void) const { GBE_ASSERT(isint64()); GenRegister r = *this; r.type = type == GEN_TYPE_UL ? GEN_TYPE_UD : GEN_TYPE_D; r.hstride = GEN_HORIZONTAL_STRIDE_2; r.vstride = GEN_VERTICAL_STRIDE_16; return r; } INLINE bool is_signed_int(void) const { if ((type == GEN_TYPE_B || type == GEN_TYPE_W || type == GEN_TYPE_D || type == GEN_TYPE_L) && file == GEN_GENERAL_REGISTER_FILE) return true; return false; } INLINE bool isdf(void) const { if (type == GEN_TYPE_DF && file == GEN_GENERAL_REGISTER_FILE) return true; return false; } INLINE int flag_nr(void) const { return nr & 15; } INLINE int flag_subnr(void) const { return subnr / typeSize(type); } static INLINE GenRegister h2(GenRegister reg) { GenRegister r = reg; r.hstride = GEN_HORIZONTAL_STRIDE_2; return r; } static INLINE GenRegister QnVirtual(GenRegister reg, uint32_t quarter) { GBE_ASSERT(reg.physical == 0); if (reg.hstride == GEN_HORIZONTAL_STRIDE_0) // scalar register return reg; else { reg.quarter = quarter; return reg; } } static INLINE GenRegister QnPhysical(GenRegister reg, uint32_t quarter) { GBE_ASSERT(reg.physical); if (reg.hstride == GEN_HORIZONTAL_STRIDE_0) // scalar register return reg; else { const uint32_t typeSz = typeSize(reg.type); const uint32_t horizontal = stride(reg.hstride); const uint32_t grfOffset = reg.nr*GEN_REG_SIZE + reg.subnr; const uint32_t nextOffset = grfOffset + 8*quarter*horizontal*typeSz; reg.nr = nextOffset / GEN_REG_SIZE; reg.subnr = (nextOffset % GEN_REG_SIZE); return reg; } } static INLINE GenRegister Qn(GenRegister reg, uint32_t quarter) { if (reg.physical) return QnPhysical(reg, quarter); else return QnVirtual(reg, quarter); } static INLINE GenRegister vec16(uint32_t file, ir::Register reg) { return GenRegister(file, reg, GEN_TYPE_F, GEN_VERTICAL_STRIDE_8, GEN_WIDTH_8, GEN_HORIZONTAL_STRIDE_1); } static INLINE GenRegister vec8(uint32_t file, ir::Register reg) { return GenRegister(file, reg, GEN_TYPE_F, GEN_VERTICAL_STRIDE_8, GEN_WIDTH_8, GEN_HORIZONTAL_STRIDE_1); } static INLINE GenRegister vec4(uint32_t file, ir::Register reg) { return GenRegister(file, reg, GEN_TYPE_F, GEN_VERTICAL_STRIDE_4, GEN_WIDTH_4, GEN_HORIZONTAL_STRIDE_1); } static INLINE GenRegister vec2(uint32_t file, ir::Register reg) { return GenRegister(file, reg, GEN_TYPE_F, GEN_VERTICAL_STRIDE_2, GEN_WIDTH_2, GEN_HORIZONTAL_STRIDE_1); } static INLINE GenRegister vec1(uint32_t file, ir::Register reg) { return GenRegister(file, reg, GEN_TYPE_F, GEN_VERTICAL_STRIDE_0, GEN_WIDTH_1, GEN_HORIZONTAL_STRIDE_0); } static INLINE GenRegister retype(GenRegister reg, uint32_t type) { reg.type = type; return reg; } static INLINE GenRegister df16(uint32_t file, ir::Register reg) { return retype(vec16(file, reg), GEN_TYPE_DF); } static INLINE GenRegister df8(uint32_t file, ir::Register reg) { return retype(vec8(file, reg), GEN_TYPE_DF); } static INLINE GenRegister df1(uint32_t file, ir::Register reg) { return retype(vec1(file, reg), GEN_TYPE_DF); } static INLINE GenRegister ud16(uint32_t file, ir::Register reg) { return retype(vec16(file, reg), GEN_TYPE_UD); } static INLINE GenRegister ud8(uint32_t file, ir::Register reg) { return retype(vec8(file, reg), GEN_TYPE_UD); } static INLINE GenRegister ud1(uint32_t file, ir::Register reg) { return retype(vec1(file, reg), GEN_TYPE_UD); } static INLINE GenRegister d8(uint32_t file, ir::Register reg) { return retype(vec8(file, reg), GEN_TYPE_D); } static INLINE GenRegister uw16(uint32_t file, ir::Register reg) { return retype(vec16(file, reg), GEN_TYPE_UW); } static INLINE GenRegister uw8(uint32_t file, ir::Register reg) { return retype(vec8(file, reg), GEN_TYPE_UW); } static INLINE GenRegister uw1(uint32_t file, ir::Register reg) { return retype(vec1(file, reg), GEN_TYPE_UW); } static INLINE GenRegister ub16(uint32_t file, ir::Register reg) { return GenRegister(file, reg, GEN_TYPE_UB, GEN_VERTICAL_STRIDE_16, GEN_WIDTH_8, GEN_HORIZONTAL_STRIDE_2); } static INLINE GenRegister ub8(uint32_t file, ir::Register reg) { return GenRegister(file, reg, GEN_TYPE_UB, GEN_VERTICAL_STRIDE_16, GEN_WIDTH_8, GEN_HORIZONTAL_STRIDE_2); } static INLINE GenRegister ub1(uint32_t file, ir::Register reg) { return retype(vec1(file, reg), GEN_TYPE_UB); } static INLINE GenRegister unpacked_uw(ir::Register reg) { return GenRegister(GEN_GENERAL_REGISTER_FILE, reg, GEN_TYPE_UW, GEN_VERTICAL_STRIDE_16, GEN_WIDTH_8, GEN_HORIZONTAL_STRIDE_2); } static INLINE GenRegister unpacked_ub(ir::Register reg) { return GenRegister(GEN_GENERAL_REGISTER_FILE, reg, GEN_TYPE_UB, GEN_VERTICAL_STRIDE_32, GEN_WIDTH_8, GEN_HORIZONTAL_STRIDE_4); } static INLINE GenRegister imm(uint32_t type) { return GenRegister(GEN_IMMEDIATE_VALUE, 0, 0, type, GEN_VERTICAL_STRIDE_0, GEN_WIDTH_1, GEN_HORIZONTAL_STRIDE_0); } static INLINE GenRegister immint64(int64_t i) { GenRegister immediate = imm(GEN_TYPE_L); immediate.value.i64 = i; return immediate; } static INLINE GenRegister immdf(double df) { GenRegister immediate = imm(GEN_TYPE_DF); immediate.value.df = df; return immediate; } static INLINE GenRegister immf(float f) { GenRegister immediate = imm(GEN_TYPE_F); immediate.value.f = f; return immediate; } static INLINE GenRegister immd(int d) { GenRegister immediate = imm(GEN_TYPE_D); immediate.value.d = d; return immediate; } static INLINE GenRegister immud(uint32_t ud) { GenRegister immediate = imm(GEN_TYPE_UD); immediate.value.ud = ud; return immediate; } static INLINE GenRegister immuw(uint16_t uw) { GenRegister immediate = imm(GEN_TYPE_UW); immediate.value.ud = uw | (uw << 16); return immediate; } static INLINE GenRegister immw(int16_t w) { GenRegister immediate = imm(GEN_TYPE_W); immediate.value.d = w | (w << 16); return immediate; } static INLINE GenRegister immv(uint32_t v) { GenRegister immediate = imm(GEN_TYPE_V); immediate.vstride = GEN_VERTICAL_STRIDE_0; immediate.width = GEN_WIDTH_8; immediate.hstride = GEN_HORIZONTAL_STRIDE_1; immediate.value.ud = v; return immediate; } static INLINE GenRegister immvf(uint32_t v) { GenRegister immediate = imm(GEN_TYPE_VF); immediate.vstride = GEN_VERTICAL_STRIDE_0; immediate.width = GEN_WIDTH_4; immediate.hstride = GEN_HORIZONTAL_STRIDE_1; immediate.value.ud = v; return immediate; } static INLINE GenRegister immvf4(uint32_t v0, uint32_t v1, uint32_t v2, uint32_t v3) { GenRegister immediate = imm(GEN_TYPE_VF); immediate.vstride = GEN_VERTICAL_STRIDE_0; immediate.width = GEN_WIDTH_4; immediate.hstride = GEN_HORIZONTAL_STRIDE_1; immediate.value.ud = ((v0 << 0) | (v1 << 8) | (v2 << 16) | (v3 << 24)); return immediate; } static INLINE GenRegister f1grf(ir::Register reg) { return vec1(GEN_GENERAL_REGISTER_FILE, reg); } static INLINE GenRegister f2grf(ir::Register reg) { return vec2(GEN_GENERAL_REGISTER_FILE, reg); } static INLINE GenRegister f4grf(ir::Register reg) { return vec4(GEN_GENERAL_REGISTER_FILE, reg); } static INLINE GenRegister f8grf(ir::Register reg) { return vec8(GEN_GENERAL_REGISTER_FILE, reg); } static INLINE GenRegister f16grf(ir::Register reg) { return vec16(GEN_GENERAL_REGISTER_FILE, reg); } static INLINE GenRegister df1grf(ir::Register reg) { return df1(GEN_GENERAL_REGISTER_FILE, reg); } static INLINE GenRegister df8grf(ir::Register reg) { return df8(GEN_GENERAL_REGISTER_FILE, reg); } static INLINE GenRegister df16grf(ir::Register reg) { return df16(GEN_GENERAL_REGISTER_FILE, reg); } static INLINE GenRegister ud16grf(ir::Register reg) { return ud16(GEN_GENERAL_REGISTER_FILE, reg); } static INLINE GenRegister ud8grf(ir::Register reg) { return ud8(GEN_GENERAL_REGISTER_FILE, reg); } static INLINE GenRegister ud1grf(ir::Register reg) { return ud1(GEN_GENERAL_REGISTER_FILE, reg); } static INLINE GenRegister uw1grf(ir::Register reg) { return uw1(GEN_GENERAL_REGISTER_FILE, reg); } static INLINE GenRegister uw8grf(ir::Register reg) { return uw8(GEN_GENERAL_REGISTER_FILE, reg); } static INLINE GenRegister uw16grf(ir::Register reg) { return uw16(GEN_GENERAL_REGISTER_FILE, reg); } static INLINE GenRegister ub1grf(ir::Register reg) { return ub1(GEN_GENERAL_REGISTER_FILE, reg); } static INLINE GenRegister ub8grf(ir::Register reg) { return ub8(GEN_GENERAL_REGISTER_FILE, reg); } static INLINE GenRegister ub16grf(ir::Register reg) { return ub16(GEN_GENERAL_REGISTER_FILE, reg); } static INLINE GenRegister null(void) { return GenRegister(GEN_ARCHITECTURE_REGISTER_FILE, GEN_ARF_NULL, 0, GEN_TYPE_F, GEN_VERTICAL_STRIDE_8, GEN_WIDTH_8, GEN_HORIZONTAL_STRIDE_1); } static INLINE bool isNull(GenRegister reg) { return (reg.file == GEN_ARCHITECTURE_REGISTER_FILE && reg.nr == GEN_ARF_NULL); } static INLINE GenRegister acc(void) { return GenRegister(GEN_ARCHITECTURE_REGISTER_FILE, GEN_ARF_ACCUMULATOR, 0, GEN_TYPE_F, GEN_VERTICAL_STRIDE_8, GEN_WIDTH_8, GEN_HORIZONTAL_STRIDE_1); } static INLINE GenRegister ip(void) { return GenRegister(GEN_ARCHITECTURE_REGISTER_FILE, GEN_ARF_IP, 0, GEN_TYPE_D, GEN_VERTICAL_STRIDE_4, GEN_WIDTH_1, GEN_HORIZONTAL_STRIDE_0); } static INLINE GenRegister notification1(void) { return GenRegister(GEN_ARCHITECTURE_REGISTER_FILE, GEN_ARF_NOTIFICATION_COUNT, 0, GEN_TYPE_UD, GEN_VERTICAL_STRIDE_0, GEN_WIDTH_1, GEN_HORIZONTAL_STRIDE_0); } static INLINE GenRegister flag(uint32_t nr, uint32_t subnr) { return GenRegister(GEN_ARCHITECTURE_REGISTER_FILE, GEN_ARF_FLAG | nr, subnr, GEN_TYPE_UW, GEN_VERTICAL_STRIDE_0, GEN_WIDTH_1, GEN_HORIZONTAL_STRIDE_0); } static INLINE GenRegister next(GenRegister reg) { if (reg.physical) reg.nr++; else reg.quarter++; return reg; } /*! Build an indirectly addressed source */ static INLINE GenRegister indirect(uint32_t type, uint32_t subnr, uint32_t width) { GenRegister reg; reg.type = type; reg.file = GEN_GENERAL_REGISTER_FILE; reg.address_mode = GEN_ADDRESS_REGISTER_INDIRECT_REGISTER; reg.width = width; reg.subnr = subnr; reg.nr = 0; reg.negation = 0; reg.absolute = 0; reg.vstride = 0; reg.hstride = 0; return reg; } static INLINE GenRegister vec16(uint32_t file, uint32_t nr, uint32_t subnr) { return GenRegister(file, nr, subnr, GEN_TYPE_F, GEN_VERTICAL_STRIDE_8, GEN_WIDTH_8, GEN_HORIZONTAL_STRIDE_1); } static INLINE GenRegister vec8(uint32_t file, uint32_t nr, uint32_t subnr) { return GenRegister(file, nr, subnr, GEN_TYPE_F, GEN_VERTICAL_STRIDE_8, GEN_WIDTH_8, GEN_HORIZONTAL_STRIDE_1); } static INLINE GenRegister vec4(uint32_t file, uint32_t nr, uint32_t subnr) { return GenRegister(file, nr, subnr, GEN_TYPE_F, GEN_VERTICAL_STRIDE_4, GEN_WIDTH_4, GEN_HORIZONTAL_STRIDE_1); } static INLINE GenRegister vec2(uint32_t file, uint32_t nr, uint32_t subnr) { return GenRegister(file, nr, subnr, GEN_TYPE_F, GEN_VERTICAL_STRIDE_2, GEN_WIDTH_2, GEN_HORIZONTAL_STRIDE_1); } static INLINE GenRegister vec1(uint32_t file, uint32_t nr, uint32_t subnr) { return GenRegister(file, nr, subnr, GEN_TYPE_F, GEN_VERTICAL_STRIDE_0, GEN_WIDTH_1, GEN_HORIZONTAL_STRIDE_0); } static INLINE int hstride_size(GenRegister reg) { switch (reg.hstride) { case GEN_HORIZONTAL_STRIDE_0: return 0; case GEN_HORIZONTAL_STRIDE_1: return 1; case GEN_HORIZONTAL_STRIDE_2: return 2; case GEN_HORIZONTAL_STRIDE_4: return 4; default: NOT_IMPLEMENTED; return 0; } } static INLINE GenRegister suboffset(GenRegister reg, uint32_t delta) { if (reg.hstride != GEN_HORIZONTAL_STRIDE_0) { reg.subnr += delta * typeSize(reg.type); reg.nr += reg.subnr / 32; reg.subnr %= 32; } return reg; } static INLINE GenRegister df16(uint32_t file, uint32_t nr, uint32_t subnr) { return retype(vec16(file, nr, subnr), GEN_TYPE_DF); } static INLINE GenRegister df8(uint32_t file, uint32_t nr, uint32_t subnr) { return retype(vec8(file, nr, subnr), GEN_TYPE_DF); } static INLINE GenRegister df1(uint32_t file, uint32_t nr, uint32_t subnr) { return retype(vec1(file, nr, subnr), GEN_TYPE_DF); } static INLINE GenRegister ud16(uint32_t file, uint32_t nr, uint32_t subnr) { return retype(vec16(file, nr, subnr), GEN_TYPE_UD); } static INLINE GenRegister ud8(uint32_t file, uint32_t nr, uint32_t subnr) { return retype(vec8(file, nr, subnr), GEN_TYPE_UD); } static INLINE GenRegister ud1(uint32_t file, uint32_t nr, uint32_t subnr) { return retype(vec1(file, nr, subnr), GEN_TYPE_UD); } static INLINE GenRegister d8(uint32_t file, uint32_t nr, uint32_t subnr) { return retype(vec8(file, nr, subnr), GEN_TYPE_D); } static INLINE GenRegister uw16(uint32_t file, uint32_t nr, uint32_t subnr) { return suboffset(retype(vec16(file, nr, 0), GEN_TYPE_UW), subnr); } static INLINE GenRegister uw8(uint32_t file, uint32_t nr, uint32_t subnr) { return suboffset(retype(vec8(file, nr, 0), GEN_TYPE_UW), subnr); } static INLINE GenRegister uw1(uint32_t file, uint32_t nr, uint32_t subnr) { return suboffset(retype(vec1(file, nr, 0), GEN_TYPE_UW), subnr); } static INLINE GenRegister ub16(uint32_t file, uint32_t nr, uint32_t subnr) { return GenRegister(file, nr, subnr, GEN_TYPE_UB, GEN_VERTICAL_STRIDE_16, GEN_WIDTH_8, GEN_HORIZONTAL_STRIDE_2); } static INLINE GenRegister ub8(uint32_t file, uint32_t nr, uint32_t subnr) { return GenRegister(file, nr, subnr, GEN_TYPE_UB, GEN_VERTICAL_STRIDE_16, GEN_WIDTH_8, GEN_HORIZONTAL_STRIDE_2); } static INLINE GenRegister ub1(uint32_t file, uint32_t nr, uint32_t subnr) { return suboffset(retype(vec1(file, nr, 0), GEN_TYPE_UB), subnr); } static INLINE GenRegister f1grf(uint32_t nr, uint32_t subnr) { return vec1(GEN_GENERAL_REGISTER_FILE, nr, subnr); } static INLINE GenRegister f2grf(uint32_t nr, uint32_t subnr) { return vec2(GEN_GENERAL_REGISTER_FILE, nr, subnr); } static INLINE GenRegister f4grf(uint32_t nr, uint32_t subnr) { return vec4(GEN_GENERAL_REGISTER_FILE, nr, subnr); } static INLINE GenRegister f8grf(uint32_t nr, uint32_t subnr) { return vec8(GEN_GENERAL_REGISTER_FILE, nr, subnr); } static INLINE GenRegister f16grf(uint32_t nr, uint32_t subnr) { return vec16(GEN_GENERAL_REGISTER_FILE, nr, subnr); } static INLINE GenRegister df16grf(uint32_t nr, uint32_t subnr) { return df16(GEN_GENERAL_REGISTER_FILE, nr, subnr); } static INLINE GenRegister df8grf(uint32_t nr, uint32_t subnr) { return df8(GEN_GENERAL_REGISTER_FILE, nr, subnr); } static INLINE GenRegister df1grf(uint32_t nr, uint32_t subnr) { return df1(GEN_GENERAL_REGISTER_FILE, nr, subnr); } static INLINE GenRegister ud16grf(uint32_t nr, uint32_t subnr) { return ud16(GEN_GENERAL_REGISTER_FILE, nr, subnr); } static INLINE GenRegister ud8grf(uint32_t nr, uint32_t subnr) { return ud8(GEN_GENERAL_REGISTER_FILE, nr, subnr); } static INLINE GenRegister ud1grf(uint32_t nr, uint32_t subnr) { return ud1(GEN_GENERAL_REGISTER_FILE, nr, subnr); } static INLINE GenRegister ud1arf(uint32_t nr, uint32_t subnr) { return ud1(GEN_ARCHITECTURE_REGISTER_FILE, nr, subnr); } static INLINE GenRegister uw1grf(uint32_t nr, uint32_t subnr) { return uw1(GEN_GENERAL_REGISTER_FILE, nr, subnr); } static INLINE GenRegister uw8grf(uint32_t nr, uint32_t subnr) { return uw8(GEN_GENERAL_REGISTER_FILE, nr, subnr); } static INLINE GenRegister uw16grf(uint32_t nr, uint32_t subnr) { return uw16(GEN_GENERAL_REGISTER_FILE, nr, subnr); } static INLINE GenRegister ub1grf(uint32_t nr, uint32_t subnr) { return ub1(GEN_GENERAL_REGISTER_FILE, nr, subnr); } static INLINE GenRegister ub8grf(uint32_t nr, uint32_t subnr) { return ub8(GEN_GENERAL_REGISTER_FILE, nr, subnr); } static INLINE GenRegister ub16grf(uint32_t nr, uint32_t subnr) { return ub16(GEN_GENERAL_REGISTER_FILE, nr, subnr); } static INLINE GenRegister unpacked_uw(uint32_t nr, uint32_t subnr) { return GenRegister(GEN_GENERAL_REGISTER_FILE, nr, subnr, GEN_TYPE_UW, GEN_VERTICAL_STRIDE_16, GEN_WIDTH_8, GEN_HORIZONTAL_STRIDE_2); } static INLINE GenRegister packed_ud(uint32_t nr, uint32_t subnr) { return GenRegister(GEN_GENERAL_REGISTER_FILE, nr, subnr, GEN_TYPE_UD, GEN_VERTICAL_STRIDE_8, GEN_WIDTH_4, GEN_HORIZONTAL_STRIDE_1); } static INLINE GenRegister unpacked_ud(uint32_t nr, uint32_t subnr) { return GenRegister(GEN_GENERAL_REGISTER_FILE, nr, subnr, GEN_TYPE_UD, GEN_VERTICAL_STRIDE_8, GEN_WIDTH_4, GEN_HORIZONTAL_STRIDE_2); } static INLINE GenRegister mask(uint32_t subnr) { return uw1(GEN_ARCHITECTURE_REGISTER_FILE, GEN_ARF_MASK, subnr); } static INLINE GenRegister addr1(uint32_t subnr) { return uw1(GEN_ARCHITECTURE_REGISTER_FILE, GEN_ARF_ADDRESS, subnr); } static INLINE GenRegister addr8(uint32_t subnr) { return uw8(GEN_ARCHITECTURE_REGISTER_FILE, GEN_ARF_ADDRESS, subnr); } static INLINE GenRegister negate(GenRegister reg) { if (reg.file != GEN_IMMEDIATE_VALUE) reg.negation ^= 1; else { if (reg.type == GEN_TYPE_F) reg.value.f = -reg.value.f; else if (reg.type == GEN_TYPE_UD) reg.value.ud = -reg.value.ud; else if (reg.type == GEN_TYPE_D) reg.value.d = -reg.value.d; else if (reg.type == GEN_TYPE_UW) { const uint16_t uw = reg.value.ud & 0xffff; reg = GenRegister::immuw(-uw); } else if (reg.type == GEN_TYPE_W) { const uint16_t uw = reg.value.ud & 0xffff; reg = GenRegister::immw(-(int16_t)uw); } else NOT_SUPPORTED; } return reg; } static INLINE GenRegister abs(GenRegister reg) { reg.absolute = 1; reg.negation = 0; return reg; } /*! Generate register encoding with run-time simdWidth */ #define DECL_REG_ENCODER(NAME, SIMD16, SIMD8, SIMD1) \ template \ static INLINE GenRegister NAME(uint32_t simdWidth, Args... values) { \ if (simdWidth == 16) \ return SIMD16(values...); \ else if (simdWidth == 8) \ return SIMD8(values...); \ else if (simdWidth == 1) \ return SIMD1(values...); \ else { \ NOT_IMPLEMENTED; \ return SIMD1(values...); \ } \ } DECL_REG_ENCODER(dfxgrf, df16grf, df8grf, df1grf); DECL_REG_ENCODER(fxgrf, f16grf, f8grf, f1grf); DECL_REG_ENCODER(uwxgrf, uw16grf, uw8grf, uw1grf); DECL_REG_ENCODER(udxgrf, ud16grf, ud8grf, ud1grf); #undef DECL_REG_ENCODER }; } /* namespace gbe */ #endif /* __GEN_REGISTER_HPP__ */ Release_v0.3/backend/src/backend/program.cpp000066400000000000000000000704451223142177000211350ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file callback interface for the compiler * \author Benjamin Segovia */ #include "program.h" #include "program.hpp" #include "gen_program.h" #include "sys/platform.hpp" #include "sys/cvar.hpp" #include "ir/liveness.hpp" #include "ir/value.hpp" #include "ir/unit.hpp" #include "llvm/llvm_to_gen.hpp" #include "llvm/Config/config.h" #include #include #include #include #include #include #include /* Not defined for LLVM 3.0 */ #if !defined(LLVM_VERSION_MAJOR) #define LLVM_VERSION_MAJOR 3 #endif /* !defined(LLVM_VERSION_MAJOR) */ /* Not defined for LLVM 3.0 */ #if !defined(LLVM_VERSION_MINOR) #define LLVM_VERSION_MINOR 0 #endif /* !defined(LLVM_VERSION_MINOR) */ #include #include #include #if LLVM_VERSION_MINOR <= 1 #include #else #include #endif /* LLVM_VERSION_MINOR <= 1 */ #include #include #include #include #include #if LLVM_VERSION_MINOR <= 2 #include #else #include #endif /* LLVM_VERSION_MINOR <= 2 */ #include #include #include "src/GBEConfig.h" namespace gbe { Kernel::Kernel(const std::string &name) : name(name), args(NULL), argNum(0), curbeSize(0), stackSize(0), useSLM(false), slmSize(0), ctx(NULL), samplerSet(NULL), imageSet(NULL) {} Kernel::~Kernel(void) { if(ctx) GBE_DELETE(ctx); if(samplerSet) GBE_DELETE(samplerSet); if(imageSet) GBE_DELETE(imageSet); GBE_SAFE_DELETE_ARRAY(args); } int32_t Kernel::getCurbeOffset(gbe_curbe_type type, uint32_t subType) const { const PatchInfo patch(type, subType); const auto it = std::lower_bound(patches.begin(), patches.end(), patch); if (it == patches.end()) return -1; // nothing found if (patch < *it) return -1; // they are not equal return it->offset; // we found it! } Program::Program(void) : constantSet(NULL) {} Program::~Program(void) { for (auto &kernel : kernels) GBE_DELETE(kernel.second); if (constantSet) delete constantSet; } BVAR(OCL_OUTPUT_GEN_IR, false); bool Program::buildFromLLVMFile(const char *fileName, std::string &error) { ir::Unit unit; if (llvmToGen(unit, fileName) == false) { error = std::string(fileName) + " not found"; return false; } this->buildFromUnit(unit, error); return true; } bool Program::buildFromUnit(const ir::Unit &unit, std::string &error) { constantSet = new ir::ConstantSet(unit.getConstantSet()); const auto &set = unit.getFunctionSet(); const uint32_t kernelNum = set.size(); if (OCL_OUTPUT_GEN_IR) std::cout << unit; if (kernelNum == 0) return true; for (const auto &pair : set) { const std::string &name = pair.first; Kernel *kernel = this->compileKernel(unit, name); kernel->setSamplerSet(pair.second->getSamplerSet()); kernel->setImageSet(pair.second->getImageSet()); kernels.insert(std::make_pair(name, kernel)); } return true; } #define OUT_UPDATE_SZ(elt) SERIALIZE_OUT(elt, outs, ret_size) #define IN_UPDATE_SZ(elt) DESERIALIZE_IN(elt, ins, total_size) size_t Program::serializeToBin(std::ostream& outs) { size_t ret_size = 0; size_t ker_num = kernels.size(); int has_constset = 0; OUT_UPDATE_SZ(magic_begin); if (constantSet) { has_constset = 1; OUT_UPDATE_SZ(has_constset); size_t sz = constantSet->serializeToBin(outs); if (!sz) return 0; ret_size += sz; } else { OUT_UPDATE_SZ(has_constset); } OUT_UPDATE_SZ(ker_num); for (auto ker : kernels) { size_t sz = ker.second->serializeToBin(outs); if (!sz) return 0; ret_size += sz; } OUT_UPDATE_SZ(magic_end); OUT_UPDATE_SZ(ret_size); return ret_size; } size_t Program::deserializeFromBin(std::istream& ins) { size_t total_size = 0; int has_constset = 0; size_t ker_num; uint32_t magic; IN_UPDATE_SZ(magic); if (magic != magic_begin) return 0; IN_UPDATE_SZ(has_constset); if(has_constset) { constantSet = new ir::ConstantSet; size_t sz = constantSet->deserializeFromBin(ins); if (sz == 0) { return 0; } total_size += sz; } IN_UPDATE_SZ(ker_num); for (size_t i = 0; i < ker_num; i++) { size_t ker_serial_sz; std::string ker_name; // Just a empty name here. Kernel* ker = allocateKernel(ker_name); if(!(ker_serial_sz = ker->deserializeFromBin(ins))) return 0; kernels.insert(std::make_pair(ker->getName(), ker)); total_size += ker_serial_sz; } IN_UPDATE_SZ(magic); if (magic != magic_end) return 0; size_t total_bytes; IN_UPDATE_SZ(total_bytes); if (total_bytes + sizeof(total_size) != total_size) return 0; return total_size; } size_t Kernel::serializeToBin(std::ostream& outs) { unsigned int i; size_t ret_size = 0; int has_samplerset = 0; int has_imageset = 0; OUT_UPDATE_SZ(magic_begin); OUT_UPDATE_SZ(name.size()); outs.write(name.c_str(), name.size()); ret_size += sizeof(char)*name.size(); OUT_UPDATE_SZ(argNum); for (i = 0; i < argNum; i++) { KernelArgument& arg = args[i]; OUT_UPDATE_SZ(arg.type); OUT_UPDATE_SZ(arg.size); OUT_UPDATE_SZ(arg.bufSize); } OUT_UPDATE_SZ(patches.size()); for (auto patch : patches) { unsigned int tmp; tmp = patch.type; OUT_UPDATE_SZ(tmp); tmp = patch.subType; OUT_UPDATE_SZ(tmp); tmp = patch.offset; OUT_UPDATE_SZ(tmp); } OUT_UPDATE_SZ(curbeSize); OUT_UPDATE_SZ(simdWidth); OUT_UPDATE_SZ(stackSize); OUT_UPDATE_SZ(scratchSize); OUT_UPDATE_SZ(useSLM); OUT_UPDATE_SZ(slmSize); /* samplers. */ if (samplerSet) { has_samplerset = 1; OUT_UPDATE_SZ(has_samplerset); size_t sz = samplerSet->serializeToBin(outs); if (!sz) return 0; ret_size += sz; } else { OUT_UPDATE_SZ(has_samplerset); } /* images. */ if (imageSet) { has_imageset = 1; OUT_UPDATE_SZ(has_imageset); size_t sz = imageSet->serializeToBin(outs); if (!sz) return 0; ret_size += sz; } else { OUT_UPDATE_SZ(has_imageset); } /* Code. */ const char * code = getCode(); OUT_UPDATE_SZ(getCodeSize()); outs.write(code, getCodeSize()*sizeof(char)); ret_size += getCodeSize()*sizeof(char); OUT_UPDATE_SZ(magic_end); OUT_UPDATE_SZ(ret_size); return ret_size; } size_t Kernel::deserializeFromBin(std::istream& ins) { size_t total_size = 0; int has_samplerset = 0; int has_imageset = 0; size_t code_size = 0; uint32_t magic = 0; size_t patch_num = 0; IN_UPDATE_SZ(magic); if (magic != magic_begin) return 0; size_t name_len; IN_UPDATE_SZ(name_len); char* c_name = new char[name_len+1]; ins.read(c_name, name_len*sizeof(char)); total_size += sizeof(char)*name_len; c_name[name_len] = 0; name = c_name; delete[] c_name; IN_UPDATE_SZ(argNum); args = GBE_NEW_ARRAY_NO_ARG(KernelArgument, argNum); for (uint32_t i = 0; i < argNum; i++) { KernelArgument& arg = args[i]; IN_UPDATE_SZ(arg.type); IN_UPDATE_SZ(arg.size); IN_UPDATE_SZ(arg.bufSize); } IN_UPDATE_SZ(patch_num); for (uint32_t i = 0; i < patch_num; i++) { unsigned int tmp; PatchInfo patch; IN_UPDATE_SZ(tmp); patch.type = tmp; IN_UPDATE_SZ(tmp); patch.subType = tmp; IN_UPDATE_SZ(tmp); patch.offset = tmp; patches.push_back(patch); } IN_UPDATE_SZ(curbeSize); IN_UPDATE_SZ(simdWidth); IN_UPDATE_SZ(stackSize); IN_UPDATE_SZ(scratchSize); IN_UPDATE_SZ(useSLM); IN_UPDATE_SZ(slmSize); IN_UPDATE_SZ(has_samplerset); if (has_samplerset) { samplerSet = GBE_NEW(ir::SamplerSet); size_t sz = samplerSet->deserializeFromBin(ins); if (sz == 0) { return 0; } total_size += sz; } IN_UPDATE_SZ(has_imageset); if (has_imageset) { imageSet = GBE_NEW(ir::ImageSet); size_t sz = imageSet->deserializeFromBin(ins); if (sz == 0) { return 0; } total_size += sz; } IN_UPDATE_SZ(code_size); if (code_size) { char* code = GBE_NEW_ARRAY_NO_ARG(char, code_size); ins.read(code, code_size*sizeof(char)); total_size += sizeof(char)*code_size; setCode(code, code_size); } IN_UPDATE_SZ(magic); if (magic != magic_end) return 0; size_t total_bytes; IN_UPDATE_SZ(total_bytes); if (total_bytes + sizeof(total_size) != total_size) return 0; return total_size; } #undef OUT_UPDATE_SZ #undef IN_UPDATE_SZ void Program::printStatus(int indent, std::ostream& outs) { using namespace std; string spaces = indent_to_str(indent); outs << spaces << "=============== Begin Program ===============" << "\n"; if (constantSet) { constantSet->printStatus(indent + 4, outs); } for (auto ker : kernels) { ker.second->printStatus(indent + 4, outs); } outs << spaces << "================ End Program ================" << "\n"; } void Kernel::printStatus(int indent, std::ostream& outs) { using namespace std; string spaces = indent_to_str(indent); string spaces_nl = indent_to_str(indent + 4); int num; outs << spaces << "+++++++++++ Begin Kernel +++++++++++" << "\n"; outs << spaces_nl << "Kernel Name: " << name << "\n"; outs << spaces_nl << " curbeSize: " << curbeSize << "\n"; outs << spaces_nl << " simdWidth: " << simdWidth << "\n"; outs << spaces_nl << " stackSize: " << stackSize << "\n"; outs << spaces_nl << " scratchSize: " << scratchSize << "\n"; outs << spaces_nl << " useSLM: " << useSLM << "\n"; outs << spaces_nl << " slmSize: " << slmSize << "\n"; outs << spaces_nl << " Argument Number is " << argNum << "\n"; for (uint32_t i = 0; i < argNum; i++) { KernelArgument& arg = args[i]; outs << spaces_nl << " Arg " << i << ":\n"; outs << spaces_nl << " type value: "<< arg.type << "\n"; outs << spaces_nl << " size: "<< arg.size << "\n"; outs << spaces_nl << " bufSize: "<< arg.bufSize << "\n"; } outs << spaces_nl << " Patches Number is " << patches.size() << "\n"; num = 0; for (auto patch : patches) { num++; outs << spaces_nl << " patch " << num << ":\n"; outs << spaces_nl << " type value: "<< patch.type << "\n"; outs << spaces_nl << " subtype value: "<< patch.subType << "\n"; outs << spaces_nl << " offset: "<< patch.offset << "\n"; } if (samplerSet) { samplerSet->printStatus(indent + 4, outs); } if (imageSet) { imageSet->printStatus(indent + 4, outs); } outs << spaces << "++++++++++++ End Kernel ++++++++++++" << "\n"; } /*********************** End of Program class member function *************************/ static void programDelete(gbe_program gbeProgram) { gbe::Program *program = (gbe::Program*)(gbeProgram); GBE_SAFE_DELETE(program); } static void buildModuleFromSource(const char* input, const char* output, std::string options) { // Arguments to pass to the clang frontend vector args; bool bOpt = true; bool bFastMath = false; vector useless; //hold substrings to avoid c_str free size_t start = 0, end = 0; /* clang unsupport options: -cl-denorms-are-zero, -cl-strict-aliasing -cl-no-signed-zeros, -cl-fp32-correctly-rounded-divide-sqrt all support options, refer to clang/include/clang/Driver/Options.inc Maybe can filter these options to avoid warning */ while (end != std::string::npos) { end = options.find(' ', start); std::string str = options.substr(start, end - start); start = end + 1; if(str.size() == 0) continue; if(str == "-cl-opt-disable") bOpt = false; if(str == "-cl-fast-relaxed-math") bFastMath = true; useless.push_back(str); args.push_back(str.c_str()); } args.push_back("-mllvm"); args.push_back("-inline-threshold=200000"); #ifdef GEN7_SAMPLER_CLAMP_BORDER_WORKAROUND args.push_back("-DGEN7_SAMPLER_CLAMP_BORDER_WORKAROUND"); #endif args.push_back("-emit-llvm"); // XXX we haven't implement those builtin functions, // so disable it currently. args.push_back("-fno-builtin"); if(bOpt) args.push_back("-O2"); if(bFastMath) args.push_back("-D __FAST_RELAXED_MATH__=1"); #if LLVM_VERSION_MINOR <= 2 args.push_back("-triple"); args.push_back("nvptx"); #else args.push_back("-x"); args.push_back("cl"); args.push_back("-triple"); args.push_back("spir"); #endif /* LLVM_VERSION_MINOR <= 2 */ args.push_back(input); // The compiler invocation needs a DiagnosticsEngine so it can report problems #if LLVM_VERSION_MINOR <= 1 args.push_back("-triple"); args.push_back("ptx32"); clang::TextDiagnosticPrinter *DiagClient = new clang::TextDiagnosticPrinter(llvm::errs(), clang::DiagnosticOptions()); llvm::IntrusiveRefCntPtr DiagID(new clang::DiagnosticIDs()); clang::DiagnosticsEngine Diags(DiagID, DiagClient); #else args.push_back("-ffp-contract=off"); llvm::IntrusiveRefCntPtr DiagOpts = new clang::DiagnosticOptions(); clang::TextDiagnosticPrinter *DiagClient = new clang::TextDiagnosticPrinter(llvm::errs(), &*DiagOpts); llvm::IntrusiveRefCntPtr DiagID(new clang::DiagnosticIDs()); clang::DiagnosticsEngine Diags(DiagID, &*DiagOpts, DiagClient); #endif /* LLVM_VERSION_MINOR <= 1 */ // Create the compiler invocation llvm::OwningPtr CI(new clang::CompilerInvocation); clang::CompilerInvocation::CreateFromArgs(*CI, &args[0], &args[0] + args.size(), Diags); // Create the compiler instance clang::CompilerInstance Clang; Clang.setInvocation(CI.take()); // Get ready to report problems #if LLVM_VERSION_MINOR <= 2 Clang.createDiagnostics(args.size(), &args[0]); #else Clang.createDiagnostics(); #endif /* LLVM_VERSION_MINOR <= 2 */ if (!Clang.hasDiagnostics()) return; // Set Language clang::LangOptions & lang_opts = Clang.getLangOpts(); lang_opts.OpenCL = 1; //llvm flags need command line parsing to take effect if (!Clang.getFrontendOpts().LLVMArgs.empty()) { unsigned NumArgs = Clang.getFrontendOpts().LLVMArgs.size(); const char **Args = new const char*[NumArgs + 2]; Args[0] = "clang (LLVM option parsing)"; for (unsigned i = 0; i != NumArgs; ++i){ Args[i + 1] = Clang.getFrontendOpts().LLVMArgs[i].c_str(); } Args[NumArgs + 1] = 0; llvm::cl::ParseCommandLineOptions(NumArgs + 1, Args); delete [] Args; } // Create an action and make the compiler instance carry it out llvm::OwningPtr Act(new clang::EmitLLVMOnlyAction()); sem_wait(&llvm_semaphore); auto retVal = Clang.ExecuteAction(*Act); sem_post(&llvm_semaphore); if (!retVal) return; llvm::Module *module = Act->takeModule(); std::string ErrorInfo; #if (LLVM_VERSION_MAJOR == 3) && (LLVM_VERSION_MINOR > 3) auto mode = llvm::sys::fs::F_Binary; #else auto mode = llvm::raw_fd_ostream::F_Binary; #endif llvm::raw_fd_ostream OS(output, ErrorInfo, mode); //still write to temp file for code simply, otherwise need add another function. //because gbe_program_new_from_llvm also be used by cl_program_create_from_llvm, can't be removed //TODO: Pass module to llvmToGen, if use module, should return Act and use OwningPtr out of this funciton llvm::WriteBitcodeToFile(module, OS); OS.close(); } extern std::string ocl_stdlib_str; BVAR(OCL_USE_PCH, true); static gbe_program programNewFromSource(const char *source, size_t stringSize, const char *options, char *err, size_t *errSize) { char clStr[L_tmpnam+1], llStr[L_tmpnam+1]; const std::string clName = std::string(tmpnam_r(clStr)) + ".cl"; /* unsafe! */ const std::string llName = std::string(tmpnam_r(llStr)) + ".ll"; /* unsafe! */ std::string pchHeaderName; std::string clOpt; FILE *clFile = fopen(clName.c_str(), "w"); FATAL_IF(clFile == NULL, "Failed to open temporary file"); bool usePCH = false; if(options) clOpt += options; if (options || !OCL_USE_PCH) { /* Some building option may cause the prebuild pch header file not compatible with the XXX.cl source. We need rebuild all here.*/ usePCH = false; } else { std::string dirs = PCH_OBJECT_DIR; std::istringstream idirs(dirs); while (getline(idirs, pchHeaderName, ';')) { if(access(pchHeaderName.c_str(), R_OK) == 0) { usePCH = true; break; } } } if (usePCH) { clOpt += " -include-pch "; clOpt += pchHeaderName; clOpt += " "; } else fwrite(ocl_stdlib_str.c_str(), strlen(ocl_stdlib_str.c_str()), 1, clFile); // Write the source to the cl file fwrite(source, strlen(source), 1, clFile); fclose(clFile); buildModuleFromSource(clName.c_str(), llName.c_str(), clOpt.c_str()); remove(clName.c_str()); // Now build the program from llvm gbe_program p = gbe_program_new_from_llvm(llName.c_str(), stringSize, err, errSize); remove(llName.c_str()); return p; } static size_t programGetGlobalConstantSize(gbe_program gbeProgram) { if (gbeProgram == NULL) return 0; const gbe::Program *program = (const gbe::Program*) gbeProgram; return program->getGlobalConstantSize(); } static void programGetGlobalConstantData(gbe_program gbeProgram, char *mem) { if (gbeProgram == NULL) return; const gbe::Program *program = (const gbe::Program*) gbeProgram; program->getGlobalConstantData(mem); } static uint32_t programGetKernelNum(gbe_program gbeProgram) { if (gbeProgram == NULL) return 0; const gbe::Program *program = (const gbe::Program*) gbeProgram; return program->getKernelNum(); } static gbe_kernel programGetKernelByName(gbe_program gbeProgram, const char *name) { if (gbeProgram == NULL) return NULL; const gbe::Program *program = (gbe::Program*) gbeProgram; return (gbe_kernel) program->getKernel(std::string(name)); } static gbe_kernel programGetKernel(const gbe_program gbeProgram, uint32_t ID) { if (gbeProgram == NULL) return NULL; const gbe::Program *program = (gbe::Program*) gbeProgram; return (gbe_kernel) program->getKernel(ID); } static const char *kernelGetName(gbe_kernel genKernel) { if (genKernel == NULL) return NULL; const gbe::Kernel *kernel = (const gbe::Kernel*) genKernel; return kernel->getName(); } static const char *kernelGetCode(gbe_kernel genKernel) { if (genKernel == NULL) return NULL; const gbe::Kernel *kernel = (const gbe::Kernel*) genKernel; return kernel->getCode(); } static size_t kernelGetCodeSize(gbe_kernel genKernel) { if (genKernel == NULL) return 0u; const gbe::Kernel *kernel = (const gbe::Kernel*) genKernel; return kernel->getCodeSize(); } static uint32_t kernelGetArgNum(gbe_kernel genKernel) { if (genKernel == NULL) return 0u; const gbe::Kernel *kernel = (const gbe::Kernel*) genKernel; return kernel->getArgNum(); } static uint32_t kernelGetArgSize(gbe_kernel genKernel, uint32_t argID) { if (genKernel == NULL) return 0u; const gbe::Kernel *kernel = (const gbe::Kernel*) genKernel; return kernel->getArgSize(argID); } static gbe_arg_type kernelGetArgType(gbe_kernel genKernel, uint32_t argID) { if (genKernel == NULL) return GBE_ARG_INVALID; const gbe::Kernel *kernel = (const gbe::Kernel*) genKernel; return kernel->getArgType(argID); } static uint32_t kernelGetSIMDWidth(gbe_kernel genKernel) { if (genKernel == NULL) return GBE_ARG_INVALID; const gbe::Kernel *kernel = (const gbe::Kernel*) genKernel; return kernel->getSIMDWidth(); } static int32_t kernelGetCurbeOffset(gbe_kernel genKernel, gbe_curbe_type type, uint32_t subType) { if (genKernel == NULL) return 0; const gbe::Kernel *kernel = (const gbe::Kernel*) genKernel; return kernel->getCurbeOffset(type, subType); } static int32_t kernelGetCurbeSize(gbe_kernel genKernel) { if (genKernel == NULL) return 0; const gbe::Kernel *kernel = (const gbe::Kernel*) genKernel; return kernel->getCurbeSize(); } static int32_t kernelGetStackSize(gbe_kernel genKernel) { if (genKernel == NULL) return 0; const gbe::Kernel *kernel = (const gbe::Kernel*) genKernel; return kernel->getStackSize(); } static int32_t kernelGetScratchSize(gbe_kernel genKernel) { if (genKernel == NULL) return 0; const gbe::Kernel *kernel = (const gbe::Kernel*) genKernel; return kernel->getScratchSize(); } static int32_t kernelUseSLM(gbe_kernel genKernel) { if (genKernel == NULL) return 0; const gbe::Kernel *kernel = (const gbe::Kernel*) genKernel; return kernel->getUseSLM() ? 1 : 0; } static int32_t kernelGetSLMSize(gbe_kernel genKernel) { if (genKernel == NULL) return 0; const gbe::Kernel *kernel = (const gbe::Kernel*) genKernel; return kernel->getSLMSize(); } static int32_t kernelSetConstBufSize(gbe_kernel genKernel, uint32_t argID, size_t sz) { if (genKernel == NULL) return -1; gbe::Kernel *kernel = (gbe::Kernel*) genKernel; return kernel->setConstBufSize(argID, sz); } static size_t kernelGetSamplerSize(gbe_kernel gbeKernel) { if (gbeKernel == NULL) return 0; const gbe::Kernel *kernel = (const gbe::Kernel*) gbeKernel; return kernel->getSamplerSize(); } static void kernelGetSamplerData(gbe_kernel gbeKernel, uint32_t *samplers) { if (gbeKernel == NULL) return; const gbe::Kernel *kernel = (const gbe::Kernel*) gbeKernel; kernel->getSamplerData(samplers); } static size_t kernelGetImageSize(gbe_kernel gbeKernel) { if (gbeKernel == NULL) return 0; const gbe::Kernel *kernel = (const gbe::Kernel*) gbeKernel; return kernel->getImageSize(); } static void kernelGetImageData(gbe_kernel gbeKernel, ImageInfo *images) { if (gbeKernel == NULL) return; const gbe::Kernel *kernel = (const gbe::Kernel*) gbeKernel; kernel->getImageData(images); } static uint32_t gbeImageBaseIndex = 0; static void setImageBaseIndex(uint32_t baseIdx) { gbeImageBaseIndex = baseIdx; } static uint32_t getImageBaseIndex() { return gbeImageBaseIndex; } static uint32_t kernelGetRequiredWorkGroupSize(gbe_kernel kernel, uint32_t dim) { return 0u; } } /* namespace gbe */ GBE_EXPORT_SYMBOL gbe_program_new_from_source_cb *gbe_program_new_from_source = NULL; GBE_EXPORT_SYMBOL gbe_program_new_from_binary_cb *gbe_program_new_from_binary = NULL; GBE_EXPORT_SYMBOL gbe_program_new_from_llvm_cb *gbe_program_new_from_llvm = NULL; GBE_EXPORT_SYMBOL gbe_program_get_global_constant_size_cb *gbe_program_get_global_constant_size = NULL; GBE_EXPORT_SYMBOL gbe_program_get_global_constant_data_cb *gbe_program_get_global_constant_data = NULL; GBE_EXPORT_SYMBOL gbe_program_delete_cb *gbe_program_delete = NULL; GBE_EXPORT_SYMBOL gbe_program_get_kernel_num_cb *gbe_program_get_kernel_num = NULL; GBE_EXPORT_SYMBOL gbe_program_get_kernel_by_name_cb *gbe_program_get_kernel_by_name = NULL; GBE_EXPORT_SYMBOL gbe_program_get_kernel_cb *gbe_program_get_kernel = NULL; GBE_EXPORT_SYMBOL gbe_kernel_get_name_cb *gbe_kernel_get_name = NULL; GBE_EXPORT_SYMBOL gbe_kernel_get_code_cb *gbe_kernel_get_code = NULL; GBE_EXPORT_SYMBOL gbe_kernel_get_code_size_cb *gbe_kernel_get_code_size = NULL; GBE_EXPORT_SYMBOL gbe_kernel_get_arg_num_cb *gbe_kernel_get_arg_num = NULL; GBE_EXPORT_SYMBOL gbe_kernel_get_arg_size_cb *gbe_kernel_get_arg_size = NULL; GBE_EXPORT_SYMBOL gbe_kernel_get_arg_type_cb *gbe_kernel_get_arg_type = NULL; GBE_EXPORT_SYMBOL gbe_kernel_get_simd_width_cb *gbe_kernel_get_simd_width = NULL; GBE_EXPORT_SYMBOL gbe_kernel_get_curbe_offset_cb *gbe_kernel_get_curbe_offset = NULL; GBE_EXPORT_SYMBOL gbe_kernel_get_curbe_size_cb *gbe_kernel_get_curbe_size = NULL; GBE_EXPORT_SYMBOL gbe_kernel_get_stack_size_cb *gbe_kernel_get_stack_size = NULL; GBE_EXPORT_SYMBOL gbe_kernel_get_scratch_size_cb *gbe_kernel_get_scratch_size = NULL; GBE_EXPORT_SYMBOL gbe_kernel_set_const_buffer_size_cb *gbe_kernel_set_const_buffer_size = NULL; GBE_EXPORT_SYMBOL gbe_kernel_get_required_work_group_size_cb *gbe_kernel_get_required_work_group_size = NULL; GBE_EXPORT_SYMBOL gbe_kernel_use_slm_cb *gbe_kernel_use_slm = NULL; GBE_EXPORT_SYMBOL gbe_kernel_get_slm_size_cb *gbe_kernel_get_slm_size = NULL; GBE_EXPORT_SYMBOL gbe_kernel_get_sampler_size_cb *gbe_kernel_get_sampler_size = NULL; GBE_EXPORT_SYMBOL gbe_kernel_get_sampler_data_cb *gbe_kernel_get_sampler_data = NULL; GBE_EXPORT_SYMBOL gbe_kernel_get_image_size_cb *gbe_kernel_get_image_size = NULL; GBE_EXPORT_SYMBOL gbe_kernel_get_image_data_cb *gbe_kernel_get_image_data = NULL; GBE_EXPORT_SYMBOL gbe_set_image_base_index_cb *gbe_set_image_base_index = NULL; GBE_EXPORT_SYMBOL gbe_get_image_base_index_cb *gbe_get_image_base_index = NULL; namespace gbe { /* Use pre-main to setup the call backs */ struct CallBackInitializer { CallBackInitializer(void) { gbe_program_new_from_source = gbe::programNewFromSource; gbe_program_get_global_constant_size = gbe::programGetGlobalConstantSize; gbe_program_get_global_constant_data = gbe::programGetGlobalConstantData; gbe_program_delete = gbe::programDelete; gbe_program_get_kernel_num = gbe::programGetKernelNum; gbe_program_get_kernel_by_name = gbe::programGetKernelByName; gbe_program_get_kernel = gbe::programGetKernel; gbe_kernel_get_name = gbe::kernelGetName; gbe_kernel_get_code = gbe::kernelGetCode; gbe_kernel_get_code_size = gbe::kernelGetCodeSize; gbe_kernel_get_arg_num = gbe::kernelGetArgNum; gbe_kernel_get_arg_size = gbe::kernelGetArgSize; gbe_kernel_get_arg_type = gbe::kernelGetArgType; gbe_kernel_get_simd_width = gbe::kernelGetSIMDWidth; gbe_kernel_get_curbe_offset = gbe::kernelGetCurbeOffset; gbe_kernel_get_curbe_size = gbe::kernelGetCurbeSize; gbe_kernel_get_stack_size = gbe::kernelGetStackSize; gbe_kernel_get_scratch_size = gbe::kernelGetScratchSize; gbe_kernel_set_const_buffer_size = gbe::kernelSetConstBufSize; gbe_kernel_get_required_work_group_size = gbe::kernelGetRequiredWorkGroupSize; gbe_kernel_use_slm = gbe::kernelUseSLM; gbe_kernel_get_slm_size = gbe::kernelGetSLMSize; gbe_kernel_get_sampler_size = gbe::kernelGetSamplerSize; gbe_kernel_get_sampler_data = gbe::kernelGetSamplerData; gbe_kernel_get_image_size = gbe::kernelGetImageSize; gbe_kernel_get_image_data = gbe::kernelGetImageData; gbe_get_image_base_index = gbe::getImageBaseIndex; gbe_set_image_base_index = gbe::setImageBaseIndex; genSetupCallBacks(); genSetupLLVMSemaphore(); } }; static CallBackInitializer cbInitializer; } /* namespace gbe */ Release_v0.3/backend/src/backend/program.h000066400000000000000000000214631223142177000205760ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file program.h * \author Benjamin Segovia * * C interface for the Gen kernels and programs (either real Gen ISA or Gen * simulator). This is the only thing the run-time can see from the compiler */ #ifndef __GBE_PROGRAM_H__ #define __GBE_PROGRAM_H__ #include #include #ifdef __cplusplus extern "C" { #endif /* __cplusplus */ /*! Opaque structure that interfaces a GBE program */ typedef struct _gbe_program *gbe_program; /*! Opaque structure that interfaces a GBE kernel (ie one OCL function) */ typedef struct _gbe_kernel *gbe_kernel; /*! Argument type for each function call */ enum gbe_arg_type { GBE_ARG_VALUE = 0, // int, float and so on GBE_ARG_GLOBAL_PTR = 1, // __global GBE_ARG_CONSTANT_PTR = 2, // __constant GBE_ARG_LOCAL_PTR = 3, // __local GBE_ARG_IMAGE = 4, // image2d_t, image3d_t GBE_ARG_SAMPLER = 5, // sampler_t GBE_ARG_INVALID = 0xffffffff }; /*! Constant buffer values (ie values to setup in the constant buffer) */ enum gbe_curbe_type { GBE_CURBE_LOCAL_ID_X = 0, GBE_CURBE_LOCAL_ID_Y, GBE_CURBE_LOCAL_ID_Z, GBE_CURBE_LOCAL_SIZE_X, GBE_CURBE_LOCAL_SIZE_Y, GBE_CURBE_LOCAL_SIZE_Z, GBE_CURBE_GLOBAL_SIZE_X, GBE_CURBE_GLOBAL_SIZE_Y, GBE_CURBE_GLOBAL_SIZE_Z, GBE_CURBE_GLOBAL_OFFSET_X, GBE_CURBE_GLOBAL_OFFSET_Y, GBE_CURBE_GLOBAL_OFFSET_Z, GBE_CURBE_GROUP_NUM_X, GBE_CURBE_GROUP_NUM_Y, GBE_CURBE_GROUP_NUM_Z, GBE_CURBE_WORK_DIM, GBE_CURBE_SAMPLER_INFO, GBE_CURBE_IMAGE_INFO, GBE_CURBE_STACK_POINTER, GBE_CURBE_KERNEL_ARGUMENT, GBE_CURBE_EXTRA_ARGUMENT, GBE_CURBE_BLOCK_IP, GBE_CURBE_THREAD_NUM }; /*! Extra arguments use the negative range of sub-values */ enum gbe_extra_argument { GBE_STACK_BUFFER = 0, /* Give stack location in curbe */ GBE_CONSTANT_BUFFER = 1 /* constant buffer argument location in curbe */ }; typedef struct ImageInfo { int32_t arg_idx; int32_t idx; int32_t wSlot; int32_t hSlot; int32_t depthSlot; int32_t dataTypeSlot; int32_t channelOrderSlot; int32_t dimOrderSlot; } ImageInfo; typedef void (gbe_set_image_base_index_cb)(uint32_t base_idx); extern gbe_set_image_base_index_cb *gbe_set_image_base_index; typedef uint32_t (gbe_get_image_base_index_cb)(); extern gbe_get_image_base_index_cb *gbe_get_image_base_index; /*! Get the size of defined images */ typedef size_t (gbe_kernel_get_image_size_cb)(gbe_kernel gbeKernel); extern gbe_kernel_get_image_size_cb *gbe_kernel_get_image_size; /*! Get the content of defined images */ typedef void (gbe_kernel_get_image_data_cb)(gbe_kernel gbeKernel, ImageInfo *images); extern gbe_kernel_get_image_data_cb *gbe_kernel_get_image_data; /*! Create a new program from the given source code (zero terminated string) */ typedef gbe_program (gbe_program_new_from_source_cb)(const char *source, size_t stringSize, const char *options, char *err, size_t *err_size); extern gbe_program_new_from_source_cb *gbe_program_new_from_source; /*! Create a new program from the given blob */ typedef gbe_program (gbe_program_new_from_binary_cb)(const char *binary, size_t size); extern gbe_program_new_from_binary_cb *gbe_program_new_from_binary; /*! Create a new program from the given LLVM file */ typedef gbe_program (gbe_program_new_from_llvm_cb)(const char *fileName, size_t string_size, char *err, size_t *err_size); extern gbe_program_new_from_llvm_cb *gbe_program_new_from_llvm; /*! Get the size of global constants */ typedef size_t (gbe_program_get_global_constant_size_cb)(gbe_program gbeProgram); extern gbe_program_get_global_constant_size_cb *gbe_program_get_global_constant_size; /*! Get the content of global constants */ typedef void (gbe_program_get_global_constant_data_cb)(gbe_program gbeProgram, char *mem); extern gbe_program_get_global_constant_data_cb *gbe_program_get_global_constant_data; /*! Get the size of defined samplers */ typedef size_t (gbe_kernel_get_sampler_size_cb)(gbe_kernel gbeKernel); extern gbe_kernel_get_sampler_size_cb *gbe_kernel_get_sampler_size; /*! Get the content of defined samplers */ typedef void (gbe_kernel_get_sampler_data_cb)(gbe_kernel gbeKernel, uint32_t *samplers); extern gbe_kernel_get_sampler_data_cb *gbe_kernel_get_sampler_data; /*! Destroy and deallocate the given program */ typedef void (gbe_program_delete_cb)(gbe_program); extern gbe_program_delete_cb *gbe_program_delete; /*! Get the number of functions in the program */ typedef uint32_t (gbe_program_get_kernel_num_cb)(gbe_program); extern gbe_program_get_kernel_num_cb *gbe_program_get_kernel_num; /*! Get the kernel from its name */ typedef gbe_kernel (gbe_program_get_kernel_by_name_cb)(gbe_program, const char *name); extern gbe_program_get_kernel_by_name_cb *gbe_program_get_kernel_by_name; /*! Get the kernel from its ID */ typedef gbe_kernel (gbe_program_get_kernel_cb)(gbe_program, uint32_t ID); extern gbe_program_get_kernel_cb *gbe_program_get_kernel; /*! Get the kernel name */ typedef const char *(gbe_kernel_get_name_cb)(gbe_kernel); extern gbe_kernel_get_name_cb *gbe_kernel_get_name; /*! Get the kernel source code */ typedef const char *(gbe_kernel_get_code_cb)(gbe_kernel); extern gbe_kernel_get_code_cb *gbe_kernel_get_code; /*! Get the size of the source code */ typedef size_t (gbe_kernel_get_code_size_cb)(gbe_kernel); extern gbe_kernel_get_code_size_cb *gbe_kernel_get_code_size; /*! Get the total number of arguments */ typedef uint32_t (gbe_kernel_get_arg_num_cb)(gbe_kernel); extern gbe_kernel_get_arg_num_cb *gbe_kernel_get_arg_num; /*! Get the size of the given argument */ typedef uint32_t (gbe_kernel_get_arg_size_cb)(gbe_kernel, uint32_t argID); extern gbe_kernel_get_arg_size_cb *gbe_kernel_get_arg_size; /*! Get the type of the given argument */ typedef enum gbe_arg_type (gbe_kernel_get_arg_type_cb)(gbe_kernel, uint32_t argID); extern gbe_kernel_get_arg_type_cb *gbe_kernel_get_arg_type; /*! Get the simd width for the kernel */ typedef uint32_t (gbe_kernel_get_simd_width_cb)(gbe_kernel); extern gbe_kernel_get_simd_width_cb *gbe_kernel_get_simd_width; /*! Get the curbe size required by the kernel */ typedef int32_t (gbe_kernel_get_curbe_size_cb)(gbe_kernel); extern gbe_kernel_get_curbe_size_cb *gbe_kernel_get_curbe_size; /*! Get the stack size (zero if no stack is required) */ typedef int32_t (gbe_kernel_get_stack_size_cb)(gbe_kernel); extern gbe_kernel_get_stack_size_cb *gbe_kernel_get_stack_size; /*! Get the scratch size (zero if no scratch is required) */ typedef int32_t (gbe_kernel_get_scratch_size_cb)(gbe_kernel); extern gbe_kernel_get_scratch_size_cb *gbe_kernel_get_scratch_size; /*! Get the curbe offset where to put the data. Returns -1 if not required */ typedef int32_t (gbe_kernel_get_curbe_offset_cb)(gbe_kernel, enum gbe_curbe_type type, uint32_t sub_type); extern gbe_kernel_get_curbe_offset_cb *gbe_kernel_get_curbe_offset; /*! Set the constant pointer arg size and return the cb offset in curbe */ typedef int32_t (gbe_kernel_set_const_buffer_size_cb)(gbe_kernel, uint32_t argID, size_t sz); extern gbe_kernel_set_const_buffer_size_cb *gbe_kernel_set_const_buffer_size; /*! Indicates if a work group size is required. Return the required width or 0 * if none */ typedef uint32_t (gbe_kernel_get_required_work_group_size_cb)(gbe_kernel, uint32_t dim); extern gbe_kernel_get_required_work_group_size_cb *gbe_kernel_get_required_work_group_size; /*! Says if SLM is used. Required to reconfigure the L3 complex */ typedef int32_t (gbe_kernel_use_slm_cb)(gbe_kernel); extern gbe_kernel_use_slm_cb *gbe_kernel_use_slm; /*! Get slm size needed for kernel local variables */ typedef int32_t (gbe_kernel_get_slm_size_cb)(gbe_kernel); extern gbe_kernel_get_slm_size_cb *gbe_kernel_get_slm_size; #ifdef __cplusplus } #endif /* __cplusplus */ #endif /* __GBE_PROGRAM_H__ */ Release_v0.3/backend/src/backend/program.hpp000066400000000000000000000231011223142177000211250ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file program.hpp * \author Benjamin Segovia */ #ifndef __GBE_PROGRAM_HPP__ #define __GBE_PROGRAM_HPP__ #include "backend/program.h" #include "backend/context.hpp" #include "ir/constant.hpp" #include "ir/unit.hpp" #include "ir/function.hpp" #include "ir/sampler.hpp" #include "sys/hash_map.hpp" #include "sys/vector.hpp" #include namespace gbe { namespace ir { class Unit; // Compilation unit. Contains the program to compile } /* namespace ir */ } /* namespace gbe */ namespace gbe { /*! Info for the kernel argument */ struct KernelArgument { gbe_arg_type type; //!< Pointer, structure, image, regular value? uint32_t size; //!< Size of the argument uint32_t bufSize; //!< Contant buffer size }; /*! Stores the offset where to patch where to patch */ struct PatchInfo { INLINE PatchInfo(gbe_curbe_type type, uint32_t subType = 0u, uint32_t offset = 0u) : type(uint32_t(type)), subType(subType), offset(offset) {} INLINE PatchInfo(void) {} uint64_t type : 16; //!< Type of the patch (see program.h for the list) uint64_t subType : 32; //!< Optional sub-type of the patch (see program.h) uint64_t offset : 16; //!< Optional offset to encode }; /*! We will sort PatchInfo to make binary search */ INLINE bool operator< (PatchInfo i0, PatchInfo i1) { if (i0.type != i1.type) return i0.type < i1.type; return i0.subType < i1.subType; } /*! Describe a compiled kernel */ class Kernel : public NonCopyable, public Serializable { public: /*! Create an empty kernel with the given name */ Kernel(const std::string &name); /*! Destroy it */ virtual ~Kernel(void); /*! Return the instruction stream (to be implemented) */ virtual const char *getCode(void) const = 0; /*! Set the instruction stream.*/ virtual const void setCode(const char *, size_t size) = 0; /*! Return the instruction stream size (to be implemented) */ virtual size_t getCodeSize(void) const = 0; /*! Get the kernel name */ INLINE const char *getName(void) const { return name.c_str(); } /*! Return the number of arguments for the kernel call */ INLINE uint32_t getArgNum(void) const { return argNum; } /*! Return the size of the given argument */ INLINE uint32_t getArgSize(uint32_t argID) const { return argID >= argNum ? 0u : args[argID].size; } /*! Return the type of the given argument */ INLINE gbe_arg_type getArgType(uint32_t argID) const { return argID >= argNum ? GBE_ARG_INVALID : args[argID].type; } /*! Get the offset where to patch. Returns -1 if no patch needed */ int32_t getCurbeOffset(gbe_curbe_type type, uint32_t subType) const; /*! Get the curbe size required by the kernel */ INLINE uint32_t getCurbeSize(void) const { return this->curbeSize; } /*! Return the size of the stack (zero if none) */ INLINE uint32_t getStackSize(void) const { return this->stackSize; } /*! Return the size of the scratch memory needed (zero if none) */ INLINE uint32_t getScratchSize(void) const { return this->scratchSize; } /*! Get the SIMD width for the kernel */ INLINE uint32_t getSIMDWidth(void) const { return this->simdWidth; } /*! Says if SLM is needed for it */ INLINE bool getUseSLM(void) const { return this->useSLM; } /*! get slm size for kernel local variable */ INLINE uint32_t getSLMSize(void) const { return this->slmSize; } /*! set constant buffer size and return the cb curbe offset */ int32_t setConstBufSize(uint32_t argID, size_t sz) { if(argID >= argNum) return -1; if(args[argID].type != GBE_ARG_CONSTANT_PTR) return -1; if(args[argID].bufSize != sz) { args[argID].bufSize = sz; return ctx->allocConstBuf(argID); } return -1; } /*! Set sampler set. */ void setSamplerSet(ir::SamplerSet *from) { samplerSet = from; } /*! Get defined sampler size */ size_t getSamplerSize(void) const { return samplerSet->getDataSize(); } /*! Get defined sampler value array */ void getSamplerData(uint32_t *samplers) const { samplerSet->getData(samplers); } /*! Set image set. */ void setImageSet(ir::ImageSet * from) { imageSet = from; } /*! Get defined image size */ size_t getImageSize(void) const { return imageSet->getDataSize(); } /*! Get defined image value array */ void getImageData(ImageInfo *images) const { imageSet->getData(images); } static const uint32_t magic_begin = TO_MAGIC('K', 'E', 'R', 'N'); static const uint32_t magic_end = TO_MAGIC('N', 'R', 'E', 'K'); /* format: magic_begin | name_size | name | arg_num | args | PatchInfo_num | PatchInfo | curbeSize | simdWidth | stackSize | scratchSize | useSLM | slmSize | samplers | images | code_size | code | magic_end */ /*! Implements the serialization. */ virtual size_t serializeToBin(std::ostream& outs); virtual size_t deserializeFromBin(std::istream& ins); virtual void printStatus(int indent, std::ostream& outs); protected: friend class Context; //!< Owns the kernels std::string name; //!< Kernel name KernelArgument *args; //!< Each argument vector patches; //!< Indicates how to build the curbe uint32_t argNum; //!< Number of function arguments uint32_t curbeSize; //!< Size of the data to push uint32_t simdWidth; //!< SIMD size for the kernel (lane number) uint32_t stackSize; //!< Stack size (may be 0 if unused) uint32_t scratchSize; //!< Scratch memory size (may be 0 if unused) bool useSLM; //!< SLM requires a special HW config uint32_t slmSize; //!< slm size for kernel variable Context *ctx; //!< Save context after compiler to alloc constant buffer curbe ir::SamplerSet *samplerSet;//!< Copy from the corresponding function. ir::ImageSet *imageSet; //!< Copy from the corresponding function. GBE_CLASS(Kernel); //!< Use custom allocators }; /*! Describe a compiled program */ class Program : public NonCopyable, public Serializable { public: /*! Create an empty program */ Program(void); /*! Destroy the program */ virtual ~Program(void); /*! Get the number of kernels in the program */ uint32_t getKernelNum(void) const { return kernels.size(); } /*! Get the kernel from its name */ Kernel *getKernel(const std::string &name) const { auto it = kernels.find(name); if (it == kernels.end()) return NULL; else return it->second; } /*! Get the kernel from its ID */ Kernel *getKernel(uint32_t ID) const { uint32_t currID = 0; Kernel *kernel = NULL; for (const auto &pair : kernels) { if (currID == ID) { kernel = pair.second; break; } currID++; } return kernel; } /*! Build a program from a ir::Unit */ bool buildFromUnit(const ir::Unit &unit, std::string &error); /*! Buils a program from a LLVM source code */ bool buildFromLLVMFile(const char *fileName, std::string &error); /*! Buils a program from a OCL string */ bool buildFromSource(const char *source, std::string &error); /*! Get size of the global constant arrays */ size_t getGlobalConstantSize(void) const { return constantSet->getDataSize(); } /*! Get the content of global constant arrays */ void getGlobalConstantData(char *mem) const { constantSet->getData(mem); } static const uint32_t magic_begin = TO_MAGIC('P', 'R', 'O', 'G'); static const uint32_t magic_end = TO_MAGIC('G', 'O', 'R', 'P'); /* format: magic_begin | constantSet_flag | constSet_data | kernel_num | kernel_1 | ........ | kernel_n | magic_end | total_size */ /*! Implements the serialization. */ virtual size_t serializeToBin(std::ostream& outs); virtual size_t deserializeFromBin(std::istream& ins); virtual void printStatus(int indent, std::ostream& outs); protected: /*! Compile a kernel */ virtual Kernel *compileKernel(const ir::Unit &unit, const std::string &name) = 0; /*! Allocate an empty kernel. */ virtual Kernel *allocateKernel(const std::string &name) = 0; /*! Kernels sorted by their name */ hash_map kernels; /*! Global (constants) outside any kernel */ ir::ConstantSet *constantSet; /*! Use custom allocators */ GBE_CLASS(Program); }; } /* namespace gbe */ #endif /* __GBE_PROGRAM_HPP__ */ Release_v0.3/backend/src/builtin_vector_proto.def000066400000000000000000000214721223142177000223220ustar00rootroot00000000000000##math gentype acos (gentype) gentype acosh (gentype) gentype acospi (gentype x) gentype asin (gentype) gentype asinh (gentype) gentype asinpi (gentype x) gentype atan (gentype y_over_x) gentype atan2 (gentype y, gentype x) gentype atanh (gentype) gentype atanpi (gentype x) gentype atan2pi (gentype y, gentype x) gentype cbrt (gentype) gentype ceil (gentype) gentype copysign (gentype x, gentype y) gentype cos (gentype) gentype cosh (gentype) gentype cospi (gentype x) gentype erfc (gentype) gentype erf (gentype) gentype exp (gentype x) gentype exp2 (gentype) gentype exp10 (gentype) gentype expm1 (gentype x) gentype fabs (gentype) gentype fdim (gentype x, gentype y) gentype floor (gentype) # XXX we use madd for fma #gentype fma (gentype a, gentype b, gentype c) gentype fmax (gentype x, gentype y) gentypef fmax (gentypef x, float y) gentyped fmax (gentyped x, double y) gentype fmin (gentype x, gentype y) gentypef fmin (gentypef x, float y) gentyped fmin (gentyped x, double y) gentype fmod (gentype x, gentype y) gentype fract (gentype x, __global gentype *iptr) gentype fract (gentype x, __local gentype *iptr) gentype fract (gentype x, __private gentype *iptr) floatn frexp (floatn x, __global intn *exp) floatn frexp (floatn x, __local intn *exp) floatn frexp (floatn x, __private intn *exp) float frexp (float x, __global int *exp) float frexp (float x, __local int *exp) float frexp (float x, __private int *exp) doublen frexp (doublen x, __global intn *exp) doublen frexp (doublen x, __local intn *exp) doublen frexp (doublen x, __private intn *exp) double frexp (double x, __global int *exp) double frexp (double x, __local int *exp) double frexp (double x, __private int *exp) gentype hypot (gentype x, gentype y) intn ilogb (floatn x) int ilogb (float x) intn ilogb (doublen x) int ilogb (double x) floatn ldexp (floatn x, intn k) floatn ldexp (floatn x, int k) float ldexp (float x, int k) doublen ldexp (doublen x, intn k) doublen ldexp (doublen x, int k) double ldexp (double x, int k) gentype lgamma (gentype x) floatn lgamma_r (floatn x, __global intn *signp) floatn lgamma_r (floatn x, __local intn *signp) floatn lgamma_r (floatn x, __private intn *signp) float lgamma_r (float x, __global int *signp) float lgamma_r (float x, __local int *signp) float lgamma_r (float x, __private int *signp) #doublen lgamma_r (doublen x, __global intn *signp) #doublen lgamma_r (doublen x, __local intn *signp) #doublen lgamma_r (doublen x, __private intn *signp) #double lgamma_r (double x, __global int *signp) #double lgamma_r (double x, __local int *signp) #double lgamma_r (double x, __private int *signp) gentype log (gentype) gentype log2 (gentype) gentype log10 (gentype) gentype log1p (gentype x) gentype logb (gentype x) gentype mad (gentype a, gentype b, gentype c) gentype maxmag (gentype x, gentype y) gentype minmag (gentype x, gentype y) gentype modf (gentype x, __global gentype *iptr) gentype modf (gentype x, __local gentype *iptr) gentype modf (gentype x, __private gentype *iptr) floatn nan (uintn nancode) float nan (uint nancode) doublen nan (ulongn nancode) double nan (ulong nancode) gentype nextafter (gentype x, gentype y) gentype pow (gentype x, gentype y) floatn pown (floatn x, intn y) float pown (float x, int y) doublen pown (doublen x, intn y) double pown (double x, int y) #XXX we define powr as pow #gentype powr (gentype x, gentype y) gentype remainder (gentype x, gentype y) floatn remquo (floatn x, floatn y, __global intn *quo) floatn remquo (floatn x, floatn y, __local intn *quo) floatn remquo (floatn x, floatn y, __private intn *quo) float remquo (float x, float y, __global int *quo) float remquo (float x, float y, __local int *quo) float remquo (float x, float y, __private int *quo) doublen remquo (doublen x, doublen y, __global intn *quo) doublen remquo (doublen x, doublen y, __local intn *quo) doublen remquo (doublen x, doublen y, __private intn *quo) double remquo (double x, double y, __global int *quo) double remquo (double x, double y, __local int *quo) double remquo (double x, double y, __private int *quo) gentype rint (gentype) floatn rootn (floatn x, intn y) doublen rootn (doublen x, intn y) doublen rootn (double x, int y) gentype round (gentype x) gentype rsqrt (gentype) gentype sin (gentype) gentype sincos (gentype x, __global gentype *cosval) gentype sincos (gentype x, __local gentype *cosval) gentype sincos (gentype x, __private gentype *cosval) gentype sinh (gentype) gentype sinpi (gentype x) gentype sqrt (gentype) gentype tan (gentype) gentype tanh (gentype) gentype tanpi (gentype x) gentype tgamma (gentype) gentype trunc (gentype) ##half_native_math #gentype half_cos (gentype x) #gentype half_divide (gentype x, gentype y) #gentype half_exp (gentype x) #gentype half_exp2 (gentype x) #gentype half_exp10 (gentype x) #gentype half_log (gentype x) #gentype half_log2 (gentype x) #gentype half_log10 (gentype x) #gentype half_powr (gentype x, gentype y) #gentype half_recip (gentype x) #gentype half_rsqrt (gentype x) #gentype half_sin (gentype x) #gentype half_sqrt (gentype x) #gentype half_tan (gentype x) # XXX we already defined all native and non-native # functions to the same one. #gentype native_cos (gentype x) #gentype native_divide (gentype x, gentype y) #gentype native_exp (gentype x) #gentype native_exp2 (gentype x) #gentype native_exp10 (gentype x) #gentype native_log (gentype x) #gentype native_log2 (gentype x) #gentype native_log10 (gentype x) #gentype native_powr (gentype x, gentype y) gentype native_recip (gentype x) #gentype native_rsqrt (gentype x) #gentype native_sin (gentype x) #gentype native_sqrt (gentype x) #gentype native_tan (gentype x) ##integer ugentype abs (gentype x) ugentype abs_diff (gentype x, gentype y) gentype add_sat (gentype x, gentype y) gentype hadd (gentype x, gentype y) gentype rhadd (gentype x, gentype y) gentype clamp (gentype x, gentype minval, gentype maxval) gentype clamp (gentype x, sgentype minval, sgentype maxval) gentype clz (gentype x) gentype mad_hi (gentype a, gentype b, gentype c) gentype mad_sat (gentype a, gentype b, gentype c) gentype max (gentype x, gentype y) gentype max (gentype x, sgentype y) gentype min (gentype x, gentype y) gentype min (gentype x, sgentype y) gentype mul_hi (gentype x, gentype y) gentype rotate (gentype v, gentype i) gentype sub_sat (gentype x, gentype y) shortn upsample (charn hi, ucharn lo) ushortn upsample (ucharn hi, ucharn lo) intn upsample (shortn hi, ushortn lo) uintn upsample (ushortn hi, ushortn lo) longn upsample (intn hi, uintn lo) ulongn upsample (uintn hi, uintn lo) # XXX not implemented #gentype popcount (gentype x) ##fast_integer gentype mad24 (gentype x, gentype y, gentype z) gentype mul24 (gentype x, gentype y) ##common gentype clamp (gentype x, gentype minval, gentype maxval) gentypef clamp (gentypef x, float minval, float maxval) gentyped clamp (gentyped x, double minval, double maxval) gentype degrees (gentype radians) gentype max (gentype x, gentype y) gentypef max (gentypef x, float y) gentyped max (gentyped x, double y) gentype min (gentype x, gentype y) gentypef min (gentypef x, float y) gentyped min (gentyped x, double y) gentype mix (gentype x, gentype y, gentype a) gentypef mix (gentypef x, gentypef y, float a) gentyped mix (gentyped x, gentyped y, double a) gentype radians (gentype degrees) gentype step (gentype edge, gentype x) gentypef step (float edge, gentypef x) gentyped step (double edge, gentyped x) gentype smoothstep (gentype edge0, gentype edge1, gentype x) gentypef smoothstep (float edge0, float edge1, gentypef x) gentyped smoothstep (double edge0, double edge1, gentyped x) gentype sign (gentype x) ##relational intn isequal (floatn x, floatn y) longn isequal (doublen x, doublen y) intn isnotequal (floatn x, floatn y) longn isnotequal (doublen x, doublen y) intn isgreater (floatn x, floatn y) longn isgreater (doublen x, doublen y) intn isgreaterequal (floatn x, floatn y) longn isgreaterequal (doublen x, doublen y) intn isless (floatn x, floatn y) longn isless (doublen x, doublen y) intn islessequal (floatn x, floatn y) longn islessequal (doublen x, doublen y) # XXX not implemented intn islessgreater (floatn x, floatn y) longn islessgreater (doublen x, doublen y) intn isfinite (floatn longn isfinite (doublen) intn isinf (floatn) longn isinf (doublen) intn isnan (floatn) longn isnan (doublen) intn isnormal (floatn) longn isnormal (doublen) # XXX not implemented intn isordered (floatn x, floatn y) longn isordered (doublen x, doublen y) # XXX not implemented intn isunordered (floatn x, floatn y) longn isunordered (doublen x, doublen y) intn signbit (floatn) longn signbit (doublen) int any (igentype x) int all (igentype x) # XXX need to revisit select latter #gentype bitselect (gentype a, gentype b, gentype c) gentype select (gentype a, gentype b, igentype c) gentype select (gentype a, gentype b, ugentype c) ##misc #gentypen shuffle (gentypem x, ugentypen mask) #gentypen shuffle2 (gentypem x, gentypem y, ugentypen mask) Release_v0.3/backend/src/gbe_bin_generater.cpp000066400000000000000000000226201223142177000215100ustar00rootroot00000000000000/* * Copyright © 2013 Intel Corporation * * This library is free software; you can redistribute it and/or modify it * under the terms of the GNU Lesser General Public License as published by the * Free Software Foundation; either version 2 of the License, or (at your * option) any later version. * * This library is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this library. If not, see . * */ /******************************************************************************* This file is used to generating the gbe kernel binary. These binary may be used in CL API, such as enqueue memory We generate the binary in build time to improve the performance. *******************************************************************************/ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "backend/program.h" #include "backend/program.hpp" using namespace std; #define FILE_NOT_FIND_ERR 1 #define FILE_MAP_ERR 2 #define FILE_BUILD_FAILED 3 #define FILE_SERIALIZATION_FAILED 4 class program_build_instance { protected: string prog_path; string build_opt; static string bin_path; static bool str_fmt_out; int fd; int file_len; const char* code; gbe::Program* gbe_prog; public: program_build_instance (void) : fd(-1), file_len(0), code(NULL), gbe_prog(NULL) { } explicit program_build_instance (const char* file_path, const char* option = NULL) : prog_path(file_path), build_opt(option), fd(-1), file_len(0), code(NULL), gbe_prog(NULL) { } ~program_build_instance () { if (code) { munmap((void *)(code), file_len); code = NULL; } if (fd >= 0) close(fd); if (gbe_prog) gbe_program_delete(reinterpret_cast(gbe_prog)); } program_build_instance(program_build_instance&& other) = default; #if 0 { #define SWAP(ELT) \ do { \ auto elt = this->ELT; \ this->ELT = other.ELT; \ other.ELT = elt; \ } while(0) SWAP(fd); SWAP(code); SWAP(file_len); SWAP(prog_path); SWAP(build_opt); #undef SWAP } #endif explicit program_build_instance(const program_build_instance& other) = delete; program_build_instance& operator= (const program_build_instance& other) { /* we do not want to be Lvalue copied, but operator is needed to instance the template of vector. */ assert(1); return *this; } const char* file_map_open (void) throw (int); const char* get_code (void) { return code; } const string& get_program_path (void) { return prog_path; } int get_size (void) { return file_len; } void print_file (void) { cout << code << endl; } void dump (void) { cout << "program path: " << prog_path << endl; cout << "Build option: " << build_opt << endl; print_file(); } static void set_str_fmt_out (bool flag) { str_fmt_out = flag; } static int set_bin_path (const char* path) { if (bin_path.size()) return 0; bin_path = path; return 1; } void build_program(void) throw(int); void serialize_program(void) throw(int); }; string program_build_instance::bin_path; bool program_build_instance::str_fmt_out = false; void program_build_instance::serialize_program(void) throw(int) { ofstream ofs; ostringstream oss; size_t sz; ofs.open(bin_path, ofstream::out | ofstream::app | ofstream::binary); if (str_fmt_out) { string array_name = "Unkown_name_array"; unsigned long last_slash = bin_path.rfind("/"); unsigned long last_dot = bin_path.rfind("."); if (last_slash != string::npos && last_dot != string::npos) array_name = bin_path.substr(last_slash + 1, last_dot - 1 - last_slash); ofs << "char " << array_name << "[] = {" << "\n"; sz = gbe_prog->serializeToBin(oss); for (size_t i = 0; i < sz; i++) { unsigned char c = oss.str().c_str()[i]; char asic_str[9]; sprintf(asic_str, "%2.2x", c); ofs << "0x"; ofs << asic_str << ((i == sz - 1) ? "" : ", "); } ofs << "};\n"; string array_size = array_name + "_size"; ofs << "int " << array_size << " = " << sz << ";" << "\n"; } else { sz = gbe_prog->serializeToBin(ofs); } ofs.close(); if (!sz) { throw FILE_SERIALIZATION_FAILED; } } void program_build_instance::build_program(void) throw(int) { gbe_program opaque = gbe_program_new_from_source(code, 0, build_opt.c_str(), NULL, NULL); if (!opaque) throw FILE_BUILD_FAILED; gbe_prog = reinterpret_cast(opaque); assert(gbe_program_get_kernel_num(opaque)); } const char* program_build_instance::file_map_open(void) throw(int) { void * address; /* Open the file */ fd = ::open(prog_path.c_str(), O_RDONLY); if (fd < 0) { throw FILE_NOT_FIND_ERR; } /* Map it */ file_len = lseek(fd, 0, SEEK_END); lseek(fd, 0, SEEK_SET); address = mmap(0, file_len, PROT_READ, MAP_SHARED, fd, 0); if (address == NULL) { throw FILE_MAP_ERR; } code = reinterpret_cast(address); return code; } typedef vector prog_vector; int main (int argc, const char **argv) { prog_vector prog_insts; vector argv_saved; const char* build_opt; const char* file_path; int i; int oc; deque used_index; if (argc < 2) { cout << "Usage: kernel_path [-pbuild_parameter]\n[-obin_path]" << endl; return 0; } used_index.assign(argc, 0); /* because getopt will re-sort the argv, so we save here. */ for (i=0; i< argc; i++) { argv_saved.push_back(string(argv[i])); } while ( (oc = getopt(argc, (char * const *)argv, "o:p:s")) != -1 ) { switch (oc) { case 'p': { int opt_index; if (argv[optind-1][0] == '-') {// -pXXX like opt_index = optind - 1; } else { // Must be -p XXXX mode opt_index = optind - 2; used_index[opt_index + 1] = 1; } /* opt must follow the file name.*/ if ((opt_index < 2 ) || argv[opt_index-1][0] == '-') { cout << "Usage note: Building option must follow file name" << endl; return 1; } file_path = argv[opt_index - 1]; build_opt = optarg; prog_insts.push_back(program_build_instance(file_path, build_opt)); break; } case 'o': if (!program_build_instance::set_bin_path(optarg)) { cout << "Can not specify the bin path more than once." << endl; return 1; } used_index[optind-1] = 1; break; case 's': program_build_instance::set_str_fmt_out(true); used_index[optind-1] = 1; break; case ':': cout << "Miss the file option argument" << endl; return 1; default: cout << "Unknown opt" << endl; } } for (i=1; i < argc; i++) { //cout << argv_saved[i] << endl; if (argv_saved[i].size() && argv_saved[i][0] != '-') { if (used_index[i]) continue; string file_name = argv_saved[i]; prog_vector::iterator result = find_if(prog_insts.begin(), prog_insts.end(), [&](program_build_instance & prog_inst)-> bool { bool result = false; if (prog_inst.get_program_path() == file_name) result = true; return result; }); if (result == prog_insts.end()) { prog_insts.push_back(program_build_instance(file_name.c_str(), "")); } } } for (auto& inst : prog_insts) { try { inst.file_map_open(); inst.build_program(); inst.serialize_program(); } catch (int & err_no) { if (err_no == FILE_NOT_FIND_ERR) { cout << "can not open the file " << inst.get_program_path() << endl; } else if (err_no == FILE_MAP_ERR) { cout << "map the file " << inst.get_program_path() << " failed" << endl; } else if (err_no == FILE_BUILD_FAILED) { cout << "build the file " << inst.get_program_path() << " failed" << endl; } else if (err_no == FILE_SERIALIZATION_FAILED) { cout << "Serialize the file " << inst.get_program_path() << " failed" << endl; } return -1; } } //for (auto& inst : prog_insts) { // inst.dump(); //} return 0; } Release_v0.3/backend/src/gen_as.sh000077500000000000000000000073561223142177000171670ustar00rootroot00000000000000#! /bin/sh -e . ./genconfig.sh # Generate list of union sizes for type in $TYPES; do size=`IFS=:; set -- dummy $type; echo $3` for vector_length in $VECTOR_LENGTHS; do union_sizes="$union_sizes `expr $vector_length \* $size`" done done union_sizes="`echo $union_sizes | tr ' ' '\n' | sort -n | uniq`" # For each union size for union_size in $union_sizes; do # Define an union that contains all vector types that have the same size as the union unionname="union _type_cast_${union_size}_b" echo "$unionname {" for type in $TYPES; do basetype=`IFS=:; set -- dummy $type; echo $2` basesize=`IFS=:; set -- dummy $type; echo $3` for vector_length in $VECTOR_LENGTHS; do vector_size_in_union="`expr $vector_length \* $basesize`" if test $union_size -ne $vector_size_in_union; then continue fi if test $vector_length -eq 1; then vectortype=$basetype else vectortype=$basetype$vector_length fi echo " $vectortype _$vectortype;" done done echo "};" echo # For each tuple of vector types that has the same size as the current union size, # define an as_* function that converts types without changing binary representation. for ftype in $TYPES; do fbasetype=`IFS=:; set -- dummy $ftype; echo $2` fbasesize=`IFS=:; set -- dummy $ftype; echo $3` for fvector_length in $VECTOR_LENGTHS; do fvector_size_in_union="`expr $fvector_length \* $fbasesize`" if test $union_size -ne $fvector_size_in_union; then continue fi if test $fvector_length -eq 1; then fvectortype=$fbasetype else fvectortype=$fbasetype$fvector_length fi for ttype in $TYPES; do tbasetype=`IFS=:; set -- dummy $ttype; echo $2` tbasesize=`IFS=:; set -- dummy $ttype; echo $3` if test $fbasetype = $tbasetype; then continue fi for tvector_length in $VECTOR_LENGTHS; do tvector_size_in_union="`expr $tvector_length \* $tbasesize`" if test $union_size -ne $tvector_size_in_union; then continue fi if test $tvector_length -eq 1; then tvectortype=$tbasetype else tvectortype=$tbasetype$tvector_length fi echo "INLINE OVERLOADABLE $tvectortype as_$tvectortype($fvectortype v) {" echo " $unionname u;" echo " u._$fvectortype = v;" echo " return u._$tvectortype;" echo "}" echo done done done done done Release_v0.3/backend/src/gen_builtin_vector.py000077500000000000000000000313541223142177000216250ustar00rootroot00000000000000#!/usr/bin/env python # # Copyright (C) 2012 Intel Corporation # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2 of the License, or (at your option) any later version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library. If not, see . # # Author: Zhigang Gong #/ # This file is to generate inline code to lower down those builtin # vector functions to scalar functions. import re import sys import os if len(sys.argv) != 3: print "Invalid argument {}".format(sys.argv) print "use {} spec_file_name output_file_name".format(sys.argv[0]) raise all_vector = 1,2,3,4,8,16 # generate generic type sets def gen_vector_type(type_set, vector_set = all_vector): ret = [] for t in type_set: for i in vector_set: ret.append((t, i)) return ret def set_vector_memspace(vector_type_set, memspace): ret = [] if memspace == '': return vector_type_set for t in vector_type_set: ret.append((t[0], t[1], memspace)) return ret # if we have 3 elements in the type tuple, we are a pointer with a memory space type # at the third element. def isPointer(t): return len(t) == 3 all_itype = "char","short","int","long" all_utype = "uchar","ushort","uint","ulong" all_int_type = all_itype + all_utype all_float_type = "float","double" all_type = all_int_type + all_float_type # all vector/scalar types for t in all_type: exec "{0}n = [\"{0}n\", gen_vector_type([\"{0}\"])]".format(t) exec "s{0} = [\"{0}\", gen_vector_type([\"{0}\"], [1])]".format(t) # Predefined type sets according to the Open CL spec. math_gentype = ["math_gentype", gen_vector_type(all_float_type)] math_gentypef = ["math_gentypef", gen_vector_type(["float"])] math_gentyped = ["math_gentyped", gen_vector_type(["double"])] half_native_math_gentype = ["half_native_math_gentype", gen_vector_type(["float"])] integer_gentype = ["integer_gentype", gen_vector_type(all_int_type)] integer_ugentype = ["integer_ugentype", gen_vector_type(all_utype)] integer_sgentype = ["integer_sgentype", gen_vector_type(all_int_type, [1])] fast_integer_gentype = ["fast_integer_gentype", gen_vector_type(["uint", "int"])] common_gentype = ["common_gentype", gen_vector_type(all_float_type)] common_gentypef = ["common_gentypef", gen_vector_type(["float"])] common_gentyped = ["common_gentyped", gen_vector_type(["double"])] relational_gentype = ["relational_gentype", gen_vector_type(all_type)] relational_igentype = ["relational_igentype", gen_vector_type(all_itype)] relational_ugentype = ["relational_ugentype", gen_vector_type(all_utype)] misc_gentypem = ["misc_gentypem", gen_vector_type(all_type, [2, 4, 8, 16])] misc_gentypen = ["misc_gentypen", gen_vector_type(all_type, [2, 4, 8, 16])] misc_ugentypem = ["misc_ugentypem", gen_vector_type(all_utype, [2, 4, 8, 16])] misc_ugentypen = ["misc_ugentypen", gen_vector_type(all_utype, [2, 4, 8, 16])] all_predefined_type = math_gentype, math_gentypef, math_gentyped, \ half_native_math_gentype, integer_gentype,integer_sgentype,\ integer_ugentype, charn, ucharn, shortn, ushortn, intn, \ uintn, longn, ulongn, floatn, doublen, \ fast_integer_gentype, common_gentype, common_gentypef, \ common_gentyped, relational_gentype, relational_igentype, \ relational_ugentype, schar, suchar, sshort, sint, suint, \ slong, sulong, sfloat, sdouble, misc_gentypem, \ misc_ugentypem, misc_gentypen, misc_ugentypen # type dictionary contains all the predefined type sets. type_dict = {} for t in all_predefined_type: type_dict.update({t[0]:t[1]}) def _prefix(prefix, dtype): if dtype.count("gentype") != 0: return prefix + '_' + dtype return dtype memspaces = ["__local ", "__private ", "__global "] def stripMemSpace(t): if t[0:2] == '__': for memspace in memspaces : if t[0:len(memspace)] == memspace: return memspace, t[len(memspace):] return '', t def check_type(types): for t in types: memspace, t = stripMemSpace(t) if not t in type_dict: print t raise "found invalid type." def match_unsigned(dtype): if dtype[0] == 'float': return ["uint", dtype[1]] if dtype[0] == 'double': return ["ulong", dtype[1]] if dtype[0][0] == 'u': return dtype return ['u' + dtype[0], dtype[1]] def match_signed(dtype): if dtype[0] == 'float': return ["int", dtype[1]] if dtype[0] == 'double': return ["long", dtype[1]] if dtype[0][0] != 'u': return dtype return [dtype[0][1:], dtype[1]] def match_scalar(dtype): return [dtype[0], 1] # The dstType is the expected type, srcType is # the reference type. Sometimes, the dstType and # srcType are different. We need to fix this issue # and return correct dst type. def fixup_type(dstType, srcType, n): if dstType == srcType: return dstType[n] if dstType != srcType: # scalar dst type if len(dstType) == 1: return dstType[0] # dst is not scalar bug src is scalar if len(srcType) == 1: return dstType[n] if dstType == integer_sgentype[1] and srcType == integer_gentype[1]: return match_scalar(srcType[n]) if dstType == integer_gentype[1] and \ (srcType == integer_sgentype[1] or \ srcType == integer_ugentype[1]): return dstType[n] if dstType == integer_ugentype[1] and srcType == integer_gentype[1]: return match_unsigned(srcType[n]) if dstType == relational_igentype[1] and srcType == relational_gentype[1]: return match_signed(srcType[n]) if dstType == relational_ugentype[1] and srcType == relational_gentype[1]: return match_unsigned(srcType[n]) if dstType == relational_gentype[1] and \ (srcType == relational_igentype[1] or \ srcType == relational_ugentype[1]): return dstType[n] if (len(dstType) == len(srcType)): return dstType[n] print dstType, srcType raise "type mispatch" class builtinProto(): valueTypeStr = "" functionName = "" paramTypeStrs = [] paramCount = 0 outputStr = [] prefix = "" def init(self, sectionHeader, sectionPrefix): self.valueTypeStr = "" self.functionName = "" self.paramTypeStrs = [] self.paramCount = 0 if sectionHeader != "": self.outputStr = [sectionHeader] else: self.outputStr = [] if sectionPrefix != "": self.prefix = sectionPrefix self.indent = 0 def append(self, line, nextInit = ""): self.outputStr.append(line); return nextInit; def indentSpace(self): ret = "" for i in range(self.indent): ret += ' ' return ret def init_from_line(self, t): self.append('//{}'.format(t)) line = filter(None, re.split(',| |\(', t.rstrip(')\n'))) self.paramCount = 0 stripped = 0 memSpace = '' for i, text in enumerate(line): idx = i - stripped if idx == 0: self.valueTypeStr = _prefix(self.prefix, line[i]) continue if idx == 1: self.functionName = line[i]; continue if idx % 2 == 0: if line[i][0] == '(': tmpType = line[i][1:] else: tmpType = line[i] if tmpType == '__local' or \ tmpType == '__private' or \ tmpType == '__global': memSpace = tmpType + ' ' stripped += 1 continue self.paramTypeStrs.append(memSpace + _prefix(self.prefix, tmpType)) memSpace = '' self.paramCount += 1 def gen_proto_str_1(self, vtypeSeq, ptypeSeqs, i): for n in range(0, self.paramCount): ptype = fixup_type(ptypeSeqs[n], vtypeSeq, i); vtype = fixup_type(vtypeSeq, ptypeSeqs[n], i); # XXX FIXME now skip all double vector, as we don't # defined those scalar version's prototype. if ptype[0].find('double') != -1 or \ vtype[0].find('double') != -1: return if (n == 0): formatStr = 'INLINE_OVERLOADABLE {}{} {} ('.format(vtype[0], vtype[1], self.functionName) else: formatStr += ', ' if vtype[1] == 1: return if isPointer(ptype): formatStr += ptype[2] pointerStr = '*' else: pointerStr = '' if ptype[1] != 1: formatStr += '{}{} {}param{}'.format(ptype[0], ptype[1], pointerStr, n) else: formatStr += '{} {}param{}'.format(ptype[0], pointerStr, n) formatStr += ')' formatStr = self.append(formatStr, '{{return ({}{})('.format(vtype[0], vtype[1])) self.indent = len(formatStr) for j in range(0, vtype[1]): if (j != 0): formatStr += ',' if (j + 1) % 2 == 0: formatStr += ' ' if j % 2 == 0: formatStr = self.append(formatStr, self.indentSpace()) formatStr += '{}('.format(self.functionName) for n in range(0, self.paramCount): if n != 0: formatStr += ', ' ptype = fixup_type(ptypeSeqs[n], vtypeSeq, i) vtype = fixup_type(vtypeSeq, ptypeSeqs[n], i) if vtype[1] != ptype[1]: if ptype[1] != 1: raise "parameter is not a scalar but has different width with result value." if isPointer(ptype): formatStr += '&' formatStr += 'param{}'.format(n) continue if (isPointer(ptype)): formatStr += '({} {} *)param{} + {:2d}'.format(ptype[2], ptype[0], n, j) else: if (self.functionName == 'select' and n == 2): formatStr += '({0})(param{1}.s{2:x} & (({0})1 << (sizeof({0})*8 - 1)))'.format(ptype[0], n, j) else: formatStr += 'param{}.s{:x}'.format(n, j) formatStr += ')' formatStr += '); }\n' self.append(formatStr) return formatStr def output(self): for line in self.outputStr: print line def output(self, outFile): for line in self.outputStr: outFile.write('{}\n'.format(line)) def gen_proto_str(self): check_type([self.valueTypeStr] + self.paramTypeStrs) vtypeSeq = type_dict[self.valueTypeStr] ptypeSeqs = [] count = len(vtypeSeq); for t in self.paramTypeStrs: memspace,t = stripMemSpace(t) ptypeSeqs.append(set_vector_memspace(type_dict[t], memspace)) count = max(count, len(type_dict[t])) for i in range(count): formatStr = self.gen_proto_str_1(vtypeSeq, ptypeSeqs, i) self.append("") def safeUnlink(filename): try: os.remove(filename) except OSError: pass # save the prototypes into ocl_vector.h specFile = open(sys.argv[1], 'r') headerFileName = sys.argv[2] tempHeaderFileName = sys.argv[2] + '.tmp' safeUnlink(headerFileName) tempHeader = open(tempHeaderFileName, 'w') tempHeader.write("//This file is autogenerated by {}.\n".format(sys.argv[0])) tempHeader.write("//Don't modify it manually.\n") functionProto = builtinProto() for line in specFile: if line.isspace(): continue if line[0] == '#': if line[1] == '#': sectionHeader = "//{} builtin functions".format(line[2:].rstrip()) sectionPrefix=(line[2:].split())[0] continue functionProto.init(sectionHeader, sectionPrefix) sectionHeader = "" setionPrefix = "" functionProto.init_from_line(line) functionProto.gen_proto_str() functionProto.output(tempHeader) tempHeader.close() os.rename(tempHeaderFileName, headerFileName) Release_v0.3/backend/src/gen_convert.sh000077500000000000000000000165111223142177000202350ustar00rootroot00000000000000#! /bin/sh -e . ./genconfig.sh # For all vector lengths and types, generate conversion functions for vector_length in $VECTOR_LENGTHS; do if test $vector_length -eq 1; then for ftype in $TYPES; do fbasetype=`IFS=:; set -- dummy $ftype; echo $2` for ttype in $TYPES; do tbasetype=`IFS=:; set -- dummy $ttype; echo $2` if test $fbasetype = $tbasetype; then continue fi echo "INLINE OVERLOADABLE $tbasetype convert_$tbasetype($fbasetype v) {" echo " return ($tbasetype)v;" echo "}" echo done done else for ftype in $TYPES; do fbasetype=`IFS=:; set -- dummy $ftype; echo $2` for ttype in $TYPES; do tbasetype=`IFS=:; set -- dummy $ttype; echo $2` if test $fbasetype = $tbasetype; then if test $vector_length -gt 1; then fvectortype=$fbasetype$vector_length tvectortype=$tbasetype$vector_length echo "INLINE OVERLOADABLE $tvectortype convert_$tvectortype($fvectortype v) { return v; }" else echo "INLINE OVERLOADABLE $tbasetype convert_$tbasetype($fbasetype v) { return v; }" fi continue fi fvectortype=$fbasetype$vector_length tvectortype=$tbasetype$vector_length construct="($tbasetype)(v.s0)" if test $vector_length -gt 1; then construct="$construct, ($tbasetype)(v.s1)" fi if test $vector_length -gt 2; then construct="$construct, ($tbasetype)(v.s2)" fi if test $vector_length -gt 3; then construct="$construct, ($tbasetype)(v.s3)" fi if test $vector_length -gt 4; then construct="$construct, ($tbasetype)(v.s4)" construct="$construct, ($tbasetype)(v.s5)" construct="$construct, ($tbasetype)(v.s6)" construct="$construct, ($tbasetype)(v.s7)" fi if test $vector_length -gt 8; then construct="$construct, ($tbasetype)(v.s8)" construct="$construct, ($tbasetype)(v.s9)" construct="$construct, ($tbasetype)(v.sA)" construct="$construct, ($tbasetype)(v.sB)" construct="$construct, ($tbasetype)(v.sC)" construct="$construct, ($tbasetype)(v.sD)" construct="$construct, ($tbasetype)(v.sE)" construct="$construct, ($tbasetype)(v.sF)" fi echo "INLINE OVERLOADABLE $tvectortype convert_$tvectortype($fvectortype v) {" echo " return ($tvectortype)($construct);" echo "}" echo done done fi done echo ' #define DEF(DSTTYPE, SRCTYPE) \ OVERLOADABLE DSTTYPE convert_ ## DSTTYPE ## _sat(SRCTYPE x); DEF(char, uchar); DEF(char, short); DEF(char, ushort); DEF(char, int); DEF(char, uint); DEF(char, float); DEF(uchar, char); DEF(uchar, short); DEF(uchar, ushort); DEF(uchar, int); DEF(uchar, uint); DEF(uchar, float); DEF(short, ushort); DEF(short, int); DEF(short, uint); DEF(short, float); DEF(ushort, short); DEF(ushort, int); DEF(ushort, uint); DEF(ushort, float); DEF(int, uint); DEF(int, float); DEF(uint, int); DEF(uint, float); #undef DEF #define DEF(DSTTYPE, SRCTYPE, MIN, MAX) \ INLINE_OVERLOADABLE DSTTYPE convert_ ## DSTTYPE ## _sat(SRCTYPE x) { \ return x > MAX ? (DSTTYPE)MAX : x < MIN ? (DSTTYPE)MIN : x; \ } DEF(char, long, -128, 127); DEF(uchar, long, 0, 255); DEF(short, long, -32768, 32767); DEF(ushort, long, 0, 65535); DEF(int, long, -0x7fffffff-1, 0x7fffffff); DEF(uint, long, 0, 0xffffffffu); DEF(long, float, -9.223372036854776e+18f, 9.223372036854776e+18f); DEF(ulong, float, 0, 1.8446744073709552e+19f); #undef DEF #define DEF(DSTTYPE, SRCTYPE, MAX) \ INLINE_OVERLOADABLE DSTTYPE convert_ ## DSTTYPE ## _sat(SRCTYPE x) { \ return x > MAX ? (DSTTYPE)MAX : x; \ } DEF(char, ulong, 127); DEF(uchar, ulong, 255); DEF(short, ulong, 32767); DEF(ushort, ulong, 65535); DEF(int, ulong, 0x7fffffff); DEF(uint, ulong, 0xffffffffu); #undef DEF INLINE_OVERLOADABLE long convert_long_sat(ulong x) { ulong MAX = 0x7ffffffffffffffful; return x > MAX ? MAX : x; } INLINE_OVERLOADABLE ulong convert_ulong_sat(long x) { return x < 0 ? 0 : x; } #define DEF(DSTTYPE, SRCTYPE) \ INLINE_OVERLOADABLE DSTTYPE convert_ ## DSTTYPE ## _sat(SRCTYPE x) { \ return x; \ } DEF(char, char); DEF(uchar, uchar); DEF(short, char); DEF(short, uchar); DEF(short, short); DEF(ushort, char); DEF(ushort, uchar); DEF(ushort, ushort); DEF(int, char); DEF(int, uchar); DEF(int, short); DEF(int, ushort); DEF(int, int); DEF(uint, char); DEF(uint, uchar); DEF(uint, short); DEF(uint, ushort); DEF(uint, uint); DEF(long, char); DEF(long, uchar); DEF(long, short); DEF(long, ushort); DEF(long, int); DEF(long, uint); DEF(long, long); DEF(ulong, char); DEF(ulong, uchar); DEF(ulong, short); DEF(ulong, ushort); DEF(ulong, int); DEF(ulong, uint); DEF(ulong, ulong); #undef DEF ' # vector convert_DSTTYPE_sat function for vector_length in $VECTOR_LENGTHS; do if test $vector_length -eq 1; then continue; fi for ftype in $TYPES; do fbasetype=`IFS=:; set -- dummy $ftype; echo $2` if test $fbasetype = "double"; then continue; fi for ttype in $TYPES; do tbasetype=`IFS=:; set -- dummy $ttype; echo $2` if test $tbasetype = "double" -o $tbasetype = "float"; then continue; fi fvectortype=$fbasetype$vector_length tvectortype=$tbasetype$vector_length conv="convert_${tbasetype}_sat" construct="$conv(v.s0)" if test $vector_length -gt 1; then construct="$construct, $conv(v.s1)" fi if test $vector_length -gt 2; then construct="$construct, $conv(v.s2)" fi if test $vector_length -gt 3; then construct="$construct, $conv(v.s3)" fi if test $vector_length -gt 4; then construct="$construct, $conv(v.s4)" construct="$construct, $conv(v.s5)" construct="$construct, $conv(v.s6)" construct="$construct, $conv(v.s7)" fi if test $vector_length -gt 8; then construct="$construct, $conv(v.s8)" construct="$construct, $conv(v.s9)" construct="$construct, $conv(v.sA)" construct="$construct, $conv(v.sB)" construct="$construct, $conv(v.sC)" construct="$construct, $conv(v.sD)" construct="$construct, $conv(v.sE)" construct="$construct, $conv(v.sF)" fi echo "INLINE OVERLOADABLE $tvectortype convert_${tvectortype}_sat($fvectortype v) {" echo " return ($tvectortype)($construct);" echo "}" echo done done done Release_v0.3/backend/src/genconfig.sh000066400000000000000000000004421223142177000176540ustar00rootroot00000000000000#! /bin/false # This is to be sourced by the generation scripts # Supported base types and their lengths TYPES="long:8 ulong:8 int:4 uint:4 short:2 ushort:2 char:1 uchar:1 double:8 float:4" # Supported vector lengths VECTOR_LENGTHS="1 2 3 4 8 16" ## No user serviceable parts below here Release_v0.3/backend/src/ir/000077500000000000000000000000001223142177000157735ustar00rootroot00000000000000Release_v0.3/backend/src/ir/constant.cpp000066400000000000000000000100741223142177000203320ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file constant.hpp * * \author Benjamin Segovia */ #include "constant.hpp" namespace gbe { namespace ir { void ConstantSet::append(const char *data, const std::string &name, uint32_t size, uint32_t alignment) { const uint32_t offset = ALIGN(this->data.size(), alignment); const uint32_t padding = offset - this->data.size(); const Constant constant(name, size, alignment, offset); constants.push_back(constant); for (uint32_t i = 0; i < padding; ++i) this->data.push_back(0); for (uint32_t i = 0; i < size; ++i) this->data.push_back(data[i]); } #define OUT_UPDATE_SZ(elt) SERIALIZE_OUT(elt, outs, ret_size) #define IN_UPDATE_SZ(elt) DESERIALIZE_IN(elt, ins, total_size) size_t ConstantSet::serializeToBin(std::ostream& outs) { size_t ret_size = 0; OUT_UPDATE_SZ(magic_begin); /* output the const data. */ OUT_UPDATE_SZ((data.size()*sizeof(char))); if(data.size() > 0) { outs.write(data.data(), data.size()*sizeof(char)); ret_size += data.size()*sizeof(char); } OUT_UPDATE_SZ(constants.size()); for (auto const &cnst : constants) { size_t bytes = sizeof(cnst.getName().size()) //name length self + cnst.getName().size()*sizeof(char) //name + sizeof(cnst.getSize()) //size + sizeof(cnst.getAlignment()) //alignment + sizeof(cnst.getOffset()); //offset OUT_UPDATE_SZ(bytes); OUT_UPDATE_SZ(cnst.getName().size()); outs.write(cnst.getName().c_str(), cnst.getName().size()); ret_size += sizeof(char)*cnst.getName().size(); OUT_UPDATE_SZ(cnst.getSize()); OUT_UPDATE_SZ(cnst.getAlignment()); OUT_UPDATE_SZ(cnst.getOffset()); } OUT_UPDATE_SZ(magic_end); OUT_UPDATE_SZ(ret_size); return ret_size; } size_t ConstantSet::deserializeFromBin(std::istream& ins) { size_t total_size = 0; size_t global_data_sz = 0; size_t const_num; uint32_t magic; IN_UPDATE_SZ(magic); if (magic != magic_begin) return 0; IN_UPDATE_SZ(global_data_sz); for (size_t i = 0; i < global_data_sz; i++) { char elt; IN_UPDATE_SZ(elt); data.push_back(elt); } IN_UPDATE_SZ(const_num); for (size_t i = 0; i < const_num; i++) { size_t bytes; IN_UPDATE_SZ(bytes); size_t name_len; IN_UPDATE_SZ(name_len); char* c_name = new char[name_len+1]; ins.read(c_name, name_len); total_size += sizeof(char)*name_len; c_name[name_len] = 0; uint32_t size, align, offset; IN_UPDATE_SZ(size); IN_UPDATE_SZ(align); IN_UPDATE_SZ(offset); ir::Constant constant(c_name, size, align, offset); constants.push_back(constant); delete[] c_name; /* Saint check */ if (bytes != sizeof(name_len) + sizeof(char)*name_len + sizeof(size) + sizeof(align) + sizeof(offset)) return 0; } IN_UPDATE_SZ(magic); if (magic != magic_end) return 0; size_t total_bytes; IN_UPDATE_SZ(total_bytes); if (total_bytes + sizeof(total_size) != total_size) return 0; return total_size; } } /* namespace ir */ } /* namespace gbe */ Release_v0.3/backend/src/ir/constant.hpp000066400000000000000000000105521223142177000203400ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file constant.cpp * * \author Benjamin Segovia */ #ifndef __GBE_IR_CONSTANT_HPP__ #define __GBE_IR_CONSTANT_HPP__ #include "sys/vector.hpp" namespace gbe { namespace ir { /*! Describe one constant (may be a scalar or an array) */ class Constant { public: /*! Build a constant description */ INLINE Constant(const std::string &name, uint32_t size, uint32_t alignment, uint32_t offset) : name(name), size(size), alignment(alignment), offset(offset) {} /*! Copy constructor */ INLINE Constant(const Constant &other) : name(other.name), size(other.size), alignment(other.alignment), offset(other.offset) {} /*! Copy operator */ INLINE Constant& operator= (const Constant &other) { this->name = other.name; this->size = other.size; this->alignment = other.alignment; this->offset = other.offset; return *this; } /*! Nothing happens here */ INLINE ~Constant(void) {} const std::string& getName(void) const { return name; } uint32_t getSize (void) const { return size; } uint32_t getAlignment (void) const { return alignment; } uint32_t getOffset(void) const { return offset; } private: std::string name; //!< Optional name of the constant uint32_t size; //!< Size of the constant uint32_t alignment; //!< Alignment required for each constant uint32_t offset; //!< Offset of the constant in the data segment GBE_CLASS(Constant); }; /*! A constant set is a set of immutable data associated to a compilation * unit */ class ConstantSet : public Serializable { public: /*! Append a new constant in the constant set */ void append(const char*, const std::string&, uint32_t size, uint32_t alignment); /*! Number of constants */ size_t getConstantNum(void) const { return constants.size(); } /*! Get a special constant */ Constant& getConstant(size_t i) { return constants[i]; } /*! Get a special constant */ Constant& getConstant(const std::string & name) { for (auto & c : constants) { if (c.getName() == name) return c; } GBE_ASSERT(false); return *(Constant *)nullptr; } /*! Number of bytes of serialized constant data */ size_t getDataSize(void) const { return data.size(); } /*! Store serialized constant data into an array */ void getData(char *mem) const { for (size_t i = 0; i < data.size(); i ++) mem[i] = data[i]; } ConstantSet() {} ConstantSet(const ConstantSet& other) : Serializable(other), data(other.data), constants(other.constants) {} ConstantSet & operator = (const ConstantSet& other) { if (&other != this) { data = other.data; constants = other.constants; } return *this; } static const uint32_t magic_begin = TO_MAGIC('C', 'N', 'S', 'T'); static const uint32_t magic_end = TO_MAGIC('T', 'S', 'N', 'C'); /* format: magic_begin | const_data_size | const_data | constant_1_size | constant_1 | ........ | constant_n_size | constant_n | magic_end | total_size */ /*! Implements the serialization. */ virtual size_t serializeToBin(std::ostream& outs); virtual size_t deserializeFromBin(std::istream& ins); private: vector data; //!< The constant data serialized in one array vector constants;//!< Each constant description GBE_CLASS(ConstantSet); }; } /* namespace ir */ } /* namespace gbe */ #endif /* __GBE_IR_CONSTANT_HPP__ */ Release_v0.3/backend/src/ir/context.cpp000066400000000000000000000133441223142177000201700ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file context.cpp * \author Benjamin Segovia */ #include "ir/context.hpp" #include "ir/unit.hpp" #include "ir/lowering.hpp" namespace gbe { namespace ir { Context::Context(Unit &unit) : unit(unit), fn(NULL), bb(NULL), usedLabels(NULL) {} Context::~Context(void) { for (const auto &elem : fnStack) GBE_SAFE_DELETE(elem.usedLabels); GBE_SAFE_DELETE(usedLabels); } Function &Context::getFunction(void) { GBE_ASSERTM(fn != NULL, "No function currently defined"); return *fn; } void Context::appendPushedConstant(Register reg, const PushLocation &pushed) { GBE_ASSERTM(fn != NULL, "No function currently defined"); GBE_ASSERTM(fn->pushMap.contains(reg) == false, "Register already pushed"); fn->pushMap.insert(std::make_pair(reg, pushed)); fn->locationMap.insert(std::make_pair(pushed, reg)); } void Context::startFunction(const std::string &name) { fnStack.push_back(StackElem(fn,bb,usedLabels)); fn = unit.newFunction(name); usedLabels = GBE_NEW_NO_ARG(vector); bb = NULL; } void Context::endFunction(void) { GBE_ASSERTM(fn != NULL, "No function to end"); GBE_ASSERT(fnStack.size() != 0); GBE_ASSERT(usedLabels != NULL); // Empty function -> append a return if (fn->blockNum() == 0) this->RET(); // Check first that all branch instructions point to valid labels GBE_ASSERT(usedLabels); #if GBE_DEBUG for (auto usage : *usedLabels) GBE_ASSERTM(usage != LABEL_IS_POINTED, "A label is used and not defined"); #endif /* GBE_DEBUG */ GBE_DELETE(usedLabels); // Remove all returns and insert one unique return block at the end of the // function lowerReturn(unit, fn->getName()); // Spill function argument to the stack if required and identify which // function arguments can use constant push lowerFunctionArguments(unit, fn->getName()); // Properly order labels and compute the CFG fn->sortLabels(); fn->computeCFG(); const StackElem elem = fnStack.back(); fnStack.pop_back(); fn = elem.fn; bb = elem.bb; usedLabels = elem.usedLabels; } Register Context::reg(RegisterFamily family) { GBE_ASSERTM(fn != NULL, "No function currently defined"); return fn->newRegister(family); } LabelIndex Context::label(void) { GBE_ASSERTM(fn != NULL, "No function currently defined"); const LabelIndex index = fn->newLabel(); if (index >= usedLabels->size()) { usedLabels->resize(index + 1); (*usedLabels)[index] = 0; } return index; } void Context::input(const std::string &name, FunctionArgument::Type type, Register reg, uint32_t elementSize) { GBE_ASSERTM(fn != NULL, "No function currently defined"); GBE_ASSERTM(reg < fn->file.regNum(), "Out-of-bound register"); FunctionArgument *arg = GBE_NEW(FunctionArgument, type, reg, elementSize, name); fn->args.push_back(arg); } void Context::output(Register reg) { GBE_ASSERTM(fn != NULL, "No function currently defined"); GBE_ASSERTM(reg < fn->file.regNum(), "Out-of-bound register"); fn->outputs.push_back(reg); } void Context::startBlock(void) { GBE_ASSERTM(fn != NULL, "No function currently defined"); this->bb = GBE_NEW(BasicBlock, *fn); fn->blocks.push_back(bb); } void Context::endBlock(void) { this->bb = NULL; } void Context::append(const Instruction &insn) { GBE_ASSERTM(fn != NULL, "No function currently defined"); // Start a new block if this is a label if (insn.isMemberOf() == true) { this->endBlock(); this->startBlock(); const LabelIndex index = cast(insn).getLabelIndex(); GBE_ASSERTM(index < fn->labelNum(), "Out-of-bound label"); GBE_ASSERTM(fn->labels[index] == NULL, "Label used in a previous block"); fn->labels[index] = bb; // Now the label index is properly defined GBE_ASSERT(index < usedLabels->size()); (*usedLabels)[index] |= LABEL_IS_DEFINED; } // We create a new label for a new block if the user did not do it else if (bb == NULL) { // this->startBlock(); const LabelIndex index = this->label(); const Instruction insn = ir::LABEL(index); this->append(insn); } // Append the instruction in the stream Instruction *insnPtr = fn->newInstruction(insn); bb->append(*insnPtr); #if GBE_DEBUG std::string whyNot; GBE_ASSERTM(insnPtr->wellFormed(whyNot), whyNot.c_str()); #endif /* GBE_DEBUG */ // Close the current block if this is a branch if (insn.isMemberOf() == true) { // We must book keep the fact that the label is used if (insn.getOpcode() == OP_BRA) { const BranchInstruction &branch = cast(insn); const LabelIndex index = branch.getLabelIndex(); GBE_ASSERT(index < usedLabels->size()); (*usedLabels)[index] |= LABEL_IS_POINTED; } this->endBlock(); } } } /* namespace ir */ } /* namespace gbe */ Release_v0.3/backend/src/ir/context.hpp000066400000000000000000000202411223142177000201670ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file context.hpp * \author Benjamin Segovia */ #ifndef __GBE_IR_CONTEXT_HPP__ #define __GBE_IR_CONTEXT_HPP__ #include "ir/instruction.hpp" #include "ir/function.hpp" #include "ir/register.hpp" #include "ir/immediate.hpp" #include "ir/unit.hpp" #include "sys/vector.hpp" #include namespace gbe { namespace ir { /*! A context allows an easy creation of the functions (instruction stream and * the set of immediates and registers needed for it) and constant arrays */ class Context { public: /*! Create a new context for this unit */ Context(Unit &unit); /*! Free resources needed by context */ virtual ~Context(void); /*! Create a new function "name" */ void startFunction(const std::string &name); /*! Close the function */ void endFunction(void); /*! Get the current processed unit */ INLINE Unit &getUnit(void) { return unit; } /*! Get the current processed function */ Function &getFunction(void); /*! Set the SIMD width of the function */ void setSimdWidth(uint32_t width) const { GBE_ASSERT(width == 8 || width == 16); fn->simdWidth = width; } /*! Append a new pushed constant */ void appendPushedConstant(Register reg, const PushLocation &pushed); /*! Create a new register with the given family for the current function */ Register reg(RegisterFamily family); /*! Create a new immediate value */ template INLINE ImmediateIndex newImmediate(T value) { const Immediate imm(value); return fn->newImmediate(imm); } /*! Create an integer immediate value */ INLINE ImmediateIndex newIntegerImmediate(int64_t x, Type type) { switch (type) { case TYPE_S8: return this->newImmediate(int8_t(x)); case TYPE_U8: return this->newImmediate(uint8_t(x)); case TYPE_S16: return this->newImmediate(int16_t(x)); case TYPE_U16: return this->newImmediate(uint16_t(x)); case TYPE_S32: return this->newImmediate(int32_t(x)); case TYPE_U32: return this->newImmediate(uint32_t(x)); case TYPE_S64: return this->newImmediate(int64_t(x)); case TYPE_U64: return this->newImmediate(uint64_t(x)); default: NOT_SUPPORTED; return ImmediateIndex(0); } return ImmediateIndex(0); } /*! Set an immediate value */ template INLINE void setImmediate(ImmediateIndex index, T value) { const Immediate imm(value); fn->immediates[index] = imm; } /*! Create a new register holding the given value. A LOADI is pushed */ template INLINE Register immReg(T value) { GBE_ASSERTM(fn != NULL, "No function currently defined"); const Immediate imm(value); const ImmediateIndex index = fn->newImmediate(imm); const RegisterFamily family = getFamily(imm.type); const Register reg = this->reg(family); this->LOADI(imm.type, reg, index); return reg; } /*! Create a new label for the current function */ LabelIndex label(void); /*! Append a new input register for the function */ void input(const std::string &name, FunctionArgument::Type type, Register reg, uint32_t elemSz = 0u); /*! Append a new output register for the function */ void output(Register reg); /*! Get the immediate value */ INLINE Immediate getImmediate(ImmediateIndex index) const { return fn->getImmediate(index); } /*! Append a new tuple */ template INLINE Tuple tuple(Args...args) { GBE_ASSERTM(fn != NULL, "No function currently defined"); return fn->file.appendTuple(args...); } /*! Make a tuple from an array of register */ INLINE Tuple arrayTuple(const Register *reg, uint32_t regNum) { GBE_ASSERTM(fn != NULL, "No function currently defined"); return fn->file.appendArrayTuple(reg, regNum); } /*! We just use variadic templates to forward instruction functions */ #define DECL_INSN(NAME, FAMILY) \ template INLINE void NAME(Args...args); #include "ir/instruction.hxx" #undef DECL_INSN /*! Return the pointer size handled by the unit */ INLINE PointerSize getPointerSize(void) const { return unit.getPointerSize(); } /*! Return the family of registers that contain pointer */ INLINE RegisterFamily getPointerFamily(void) const { return unit.getPointerFamily(); } #define DECL_THREE_SRC_INSN(NAME) \ INLINE void NAME(Type type, \ Register dst, \ Register src0, \ Register src1, \ Register src2) \ { \ const Tuple index = this->tuple(src0, src1, src2); \ this->NAME(type, dst, index); \ } DECL_THREE_SRC_INSN(SEL); DECL_THREE_SRC_INSN(I64MADSAT); #undef DECL_THREE_SRC_INSN /*! For all unary functions */ void ALU1(Opcode opcode, Type type, Register dst, Register src) { const Instruction insn = gbe::ir::ALU1(opcode, type, dst, src); this->append(insn); } /*! LOAD with the destinations directly specified */ template void LOAD(Type type, Register offset, AddressSpace space, bool dwAligned, Args...values) { const Tuple index = this->tuple(values...); const uint16_t valueNum = std::tuple_size>::value; GBE_ASSERT(valueNum > 0); this->LOAD(type, index, offset, space, valueNum, dwAligned); } /*! STORE with the sources directly specified */ template void STORE(Type type, Register offset, AddressSpace space, bool dwAligned, Args...values) { const Tuple index = this->tuple(values...); const uint16_t valueNum = std::tuple_size>::value; GBE_ASSERT(valueNum > 0); this->STORE(type, index, offset, space, valueNum, dwAligned); } protected: /*! A block must be started with a label */ void startBlock(void); /*! A block must be ended with a branch */ void endBlock(void); /*! Append the instruction in the current basic block */ void append(const Instruction &insn); Unit &unit; //!< A unit is associated to a contect Function *fn; //!< Current function we are processing BasicBlock *bb; //!< Current basic block we are filling static const uint8_t LABEL_IS_POINTED = 1 << 0; //!< Branch is using it static const uint8_t LABEL_IS_DEFINED = 1 << 1; //!< Label is defining it vector *usedLabels; /*! Functions can be defined recursiely */ struct StackElem { INLINE StackElem(Function *fn, BasicBlock *bb, vector *usedLabels) : fn(fn), bb(bb), usedLabels(usedLabels) {} Function *fn; //!< Function to process BasicBlock *bb; //!< Basic block currently processed vector *usedLabels; //!< Store all labels that are defined }; vector fnStack; //!< Stack of functions still to finish GBE_CLASS(Context); }; // Use argument checker to assert argument value correctness #define DECL_INSN(NAME, FAMILY) \ template \ INLINE void Context::NAME(Args...args) { \ GBE_ASSERTM(fn != NULL, "No function currently defined"); \ const Instruction insn = gbe::ir::NAME(args...); \ this->append(insn); \ } #include "ir/instruction.hxx" #undef DECL_INSN } /* namespace ir */ } /* namespace gbe */ #endif /* __GBE_IR_CONTEXT_HPP__ */ Release_v0.3/backend/src/ir/function.cpp000066400000000000000000000253501223142177000203310ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file function.cpp * \author Benjamin Segovia */ #include "ir/function.hpp" #include "ir/unit.hpp" #include "sys/map.hpp" namespace gbe { namespace ir { /////////////////////////////////////////////////////////////////////////// // PushLocation /////////////////////////////////////////////////////////////////////////// Register PushLocation::getRegister(void) const { const Function::LocationMap &locationMap = fn.getLocationMap(); GBE_ASSERT(locationMap.contains(*this) == true); return locationMap.find(*this)->second; } /////////////////////////////////////////////////////////////////////////// // Function /////////////////////////////////////////////////////////////////////////// Function::Function(const std::string &name, const Unit &unit, Profile profile) : name(name), unit(unit), profile(profile), simdWidth(0), useSLM(false), slmSize(0) { initProfile(*this); samplerSet = GBE_NEW(SamplerSet); imageSet = GBE_NEW(ImageSet); } Function::~Function(void) { for (auto block : blocks) GBE_DELETE(block); for (auto arg : args) GBE_DELETE(arg); } RegisterFamily Function::getPointerFamily(void) const { return unit.getPointerFamily(); } void Function::sortLabels(void) { uint32_t last = 0; // Compute the new labels and patch the label instruction map labelMap; foreachInstruction([&](Instruction &insn) { if (insn.getOpcode() != OP_LABEL) return; // Create the new label const Instruction newLabel = LABEL(LabelIndex(last)); // Replace the previous label instruction LabelInstruction &label = cast(insn); const LabelIndex index = label.getLabelIndex(); labelMap.insert(std::make_pair(index, LabelIndex(last++))); newLabel.replace(&insn); }); // Patch all branch instructions with the new labels foreachInstruction([&](Instruction &insn) { if (insn.getOpcode() != OP_BRA) return; // Get the current branch instruction BranchInstruction &bra = cast(insn); const LabelIndex index = bra.getLabelIndex(); const LabelIndex newIndex = labelMap.find(index)->second; // Insert the patched branch instruction if (bra.isPredicated() == true) { const Instruction newBra = BRA(newIndex, bra.getPredicateIndex()); newBra.replace(&insn); } else { const Instruction newBra = BRA(newIndex); newBra.replace(&insn); } }); // Reset the label to block mapping this->labels.resize(last); foreachBlock([&](BasicBlock &bb) { const Instruction *first = bb.getFirstInstruction(); const LabelInstruction *label = cast(first); const LabelIndex index = label->getLabelIndex(); this->labels[index] = &bb; }); } LabelIndex Function::newLabel(void) { GBE_ASSERTM(labels.size() < 0xffff, "Too many labels are defined (65536 only are supported)"); const LabelIndex index(labels.size()); labels.push_back(NULL); return index; } void Function::outImmediate(std::ostream &out, ImmediateIndex index) const { GBE_ASSERT(index < immediates.size()); const Immediate imm = immediates[index]; switch (imm.type) { case TYPE_BOOL: out << !!imm.data.u8; break; case TYPE_S8: out << imm.data.s8; break; case TYPE_U8: out << imm.data.u8; break; case TYPE_S16: out << imm.data.s16; break; case TYPE_U16: out << imm.data.u16; break; case TYPE_S32: out << imm.data.s32; break; case TYPE_U32: out << imm.data.u32; break; case TYPE_S64: out << imm.data.s64; break; case TYPE_U64: out << imm.data.u64; break; case TYPE_HALF: out << "half(" << imm.data.u16 << ")"; break; case TYPE_FLOAT: out << imm.data.f32; break; case TYPE_DOUBLE: out << imm.data.f64; break; } } uint32_t Function::getLargestBlockSize(void) const { uint32_t insnNum = 0; foreachBlock([&insnNum](const ir::BasicBlock &bb) { insnNum = std::max(insnNum, uint32_t(bb.size())); }); return insnNum; } uint32_t Function::getFirstSpecialReg(void) const { return this->profile == PROFILE_OCL ? 0u : ~0u; } uint32_t Function::getSpecialRegNum(void) const { return this->profile == PROFILE_OCL ? ocl::regNum : ~0u; } bool Function::isEntryBlock(const BasicBlock &bb) const { if (this->blockNum() == 0) return false; else return &bb == this->blocks[0]; } const BasicBlock &Function::getTopBlock(void) const { GBE_ASSERT(blockNum() > 0 && blocks[0] != NULL); return *blocks[0]; } const BasicBlock &Function::getBottomBlock(void) const { const uint32_t n = blockNum(); GBE_ASSERT(n > 0 && blocks[n-1] != NULL); return *blocks[n-1]; } BasicBlock &Function::getBottomBlock(void) { const uint32_t n = blockNum(); GBE_ASSERT(n > 0 && blocks[n-1] != NULL); return *blocks[n-1]; } const BasicBlock &Function::getBlock(LabelIndex label) const { GBE_ASSERT(label < labelNum() && labels[label] != NULL); return *labels[label]; } const LabelInstruction *Function::getLabelInstruction(LabelIndex index) const { const BasicBlock *bb = this->labels[index]; const Instruction *first = bb->getFirstInstruction(); return cast(first); } /*! Indicate if the given register is a special one (like localID in OCL) */ bool Function::isSpecialReg(const Register ®) const { const uint32_t ID = uint32_t(reg); const uint32_t firstID = this->getFirstSpecialReg(); const uint32_t specialNum = this->getSpecialRegNum(); return ID >= firstID && ID < firstID + specialNum; } void Function::computeCFG(void) { // Clear possible previously computed CFG and compute the direct // predecessors and successors BasicBlock *prev = NULL; this->foreachBlock([this, &prev](BasicBlock &bb) { bb.successors.clear(); bb.predecessors.clear(); if (prev != NULL) { prev->nextBlock = &bb; bb.prevBlock = prev; } prev = &bb; }); // Update it. Do not forget that a branch can also jump to the next block BasicBlock *jumpToNext = NULL; this->foreachBlock([this, &jumpToNext](BasicBlock &bb) { if (jumpToNext) { jumpToNext->successors.insert(&bb); bb.predecessors.insert(jumpToNext); jumpToNext = NULL; } if (bb.size() == 0) return; Instruction *last = bb.getLastInstruction(); if (last->isMemberOf() == false) { jumpToNext = &bb; return; } const BranchInstruction &insn = cast(*last); if (insn.getOpcode() == OP_BRA) { const LabelIndex label = insn.getLabelIndex(); BasicBlock *target = this->blocks[label]; GBE_ASSERT(target != NULL); target->predecessors.insert(&bb); bb.successors.insert(target); if (insn.isPredicated() == true) jumpToNext = &bb; } }); } std::ostream &operator<< (std::ostream &out, const Function &fn) { out << ".decl_function " << fn.getName() << std::endl; out << fn.getRegisterFile(); out << "## " << fn.argNum() << " input register" << (fn.argNum() ? "s" : "") << " ##" << std::endl; for (uint32_t i = 0; i < fn.argNum(); ++i) { const FunctionArgument &input = fn.getArg(i); out << "decl_input."; switch (input.type) { case FunctionArgument::GLOBAL_POINTER: out << "global"; break; case FunctionArgument::LOCAL_POINTER: out << "local"; break; case FunctionArgument::CONSTANT_POINTER: out << "constant"; break; case FunctionArgument::VALUE: out << "value"; break; case FunctionArgument::STRUCTURE: out << "structure." << input.size; break; case FunctionArgument::IMAGE: out << "image"; break; default: break; } out << " %" << input.reg << " " << input.name << std::endl; } out << "## " << fn.outputNum() << " output register" << (fn.outputNum() ? "s" : "") << " ##" << std::endl; for (uint32_t i = 0; i < fn.outputNum(); ++i) out << "decl_output %" << fn.getOutput(i) << std::endl; out << "## " << fn.pushedNum() << " pushed register" << std::endl; const Function::PushMap &pushMap = fn.getPushMap(); for (const auto &pushed : pushMap) { out << "decl_pushed %" << pushed.first << " @{" << pushed.second.argID << "," << pushed.second.offset << "}" << std::endl; } out << "## " << fn.blockNum() << " block" << (fn.blockNum() ? "s" : "") << " ##" << std::endl; fn.foreachBlock([&](const BasicBlock &bb) { const_cast(bb).foreach([&out] (const Instruction &insn) { out << insn << std::endl; }); out << std::endl; }); out << ".end_function" << std::endl; return out; } /////////////////////////////////////////////////////////////////////////// // Basic Block /////////////////////////////////////////////////////////////////////////// BasicBlock::BasicBlock(Function &fn) : fn(fn) { this->nextBlock = this->prevBlock = NULL; } BasicBlock::~BasicBlock(void) { this->foreach([this] (Instruction &insn) { this->fn.deleteInstruction(&insn); }); } void BasicBlock::append(Instruction &insn) { insn.setParent(this); this->push_back(&insn); } Instruction *BasicBlock::getFirstInstruction(void) const { GBE_ASSERT(this->begin() != this->end()); const Instruction &insn = *this->begin(); return const_cast(&insn); } Instruction *BasicBlock::getLastInstruction(void) const { GBE_ASSERT(this->begin() != this->end()); const Instruction &insn = *(--this->end()); return const_cast(&insn); } LabelIndex BasicBlock::getLabelIndex(void) const { const Instruction *first = this->getFirstInstruction(); const LabelInstruction *label = cast(first); return label->getLabelIndex(); } } /* namespace ir */ } /* namespace gbe */ Release_v0.3/backend/src/ir/function.hpp000066400000000000000000000337731223142177000203460ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file function.hpp * \author Benjamin Segovia */ #ifndef __GBE_IR_FUNCTION_HPP__ #define __GBE_IR_FUNCTION_HPP__ #include "ir/immediate.hpp" #include "ir/register.hpp" #include "ir/instruction.hpp" #include "ir/profile.hpp" #include "ir/sampler.hpp" #include "ir/image.hpp" #include "sys/vector.hpp" #include "sys/set.hpp" #include "sys/map.hpp" #include "sys/alloc.hpp" #include namespace gbe { namespace ir { /*! Commonly used in the CFG */ typedef set BlockSet; class Unit; // Function belongs to a unit /*! Function basic blocks really belong to a function since: * 1 - registers used in the basic blocks belongs to the function register * file * 2 - branches point to basic blocks of the same function */ class BasicBlock : public NonCopyable, public intrusive_list { public: /*! Empty basic block */ BasicBlock(Function &fn); /*! Releases all the instructions */ ~BasicBlock(void); /*! Append a new instruction at the end of the stream */ void append(Instruction &insn); /*! Get the parent function */ Function &getParent(void) { return fn; } const Function &getParent(void) const { return fn; } /*! Get the next and previous allocated block */ BasicBlock *getNextBlock(void) const { return this->nextBlock; } BasicBlock *getPrevBlock(void) const { return this->prevBlock; } /*! Get / set the first and last instructions */ Instruction *getFirstInstruction(void) const; Instruction *getLastInstruction(void) const; /*! Get successors and predecessors */ const BlockSet &getSuccessorSet(void) const { return successors; } const BlockSet &getPredecessorSet(void) const { return predecessors; } /*! Get the label index of this block */ LabelIndex getLabelIndex(void) const; /*! Apply the given functor on all instructions */ template INLINE void foreach(const T &functor) { auto it = this->begin(); while (it != this->end()) { auto curr = it++; functor(*curr); } } private: friend class Function; //!< Owns the basic blocks BlockSet predecessors; //!< Incoming blocks BlockSet successors; //!< Outgoing blocks BasicBlock *nextBlock; //!< Block allocated just after this one BasicBlock *prevBlock; //!< Block allocated just before this one Function &fn; //!< Function the block belongs to GBE_CLASS(BasicBlock); }; /*! In fine, function input arguments can be pushed from the constant * buffer if they are structures. Other arguments can be images (textures) * and will also require special treatment. */ struct FunctionArgument { enum Type { GLOBAL_POINTER = 0, // __global CONSTANT_POINTER = 1, // __constant LOCAL_POINTER = 2, // __local VALUE = 3, // int, float STRUCTURE = 4, // struct foo IMAGE = 5, // image*d_t SAMPLER = 6 }; /*! Create a function input argument */ INLINE FunctionArgument(Type type, Register reg, uint32_t size, const std::string &name) : type(type), reg(reg), size(size), name(name) {} Type type; //!< Gives the type of argument we have Register reg; //!< Holds the argument uint32_t size; //!< == sizeof(void*) for ptr, sizeof(elem) for the rest const std::string name; //!< Holds the function name for IR output GBE_STRUCT(FunctionArgument); // Use custom allocator }; /*! Maps the pushed register to the function argument */ struct PushLocation { INLINE PushLocation(const Function &fn, uint32_t argID, uint32_t offset) : fn(fn), argID(argID), offset(offset) {} /*! Get the pushed virtual register */ Register getRegister(void) const; const Function &fn; //!< Function it belongs to uint32_t argID; //!< Function argument uint32_t offset; //!< Offset in the function argument GBE_STRUCT(PushLocation); // Use custom allocator }; /*! For maps and sets */ INLINE bool operator< (const PushLocation &arg0, const PushLocation &arg1) { if (arg0.argID != arg1.argID) return arg0.argID < arg1.argID; return arg0.offset < arg1.offset; } /*! A function is : * - a register file * - a set of basic block layout into a CGF * - input arguments */ class Function : public NonCopyable { public: /*! Map of all pushed registers */ typedef map PushMap; /*! Map of all pushed location (i.e. part of function argument) */ typedef map LocationMap; /*! Create an empty function */ Function(const std::string &name, const Unit &unit, Profile profile = PROFILE_OCL); /*! Release everything *including* the basic block pointers */ ~Function(void); /*! Get the function profile */ INLINE Profile getProfile(void) const { return profile; } /*! Get a new valid register */ INLINE Register newRegister(RegisterFamily family) { return this->file.append(family); } /*! Get the function name */ const std::string &getName(void) const { return name; } /*! When set, we do not have choice any more in the back end for it */ INLINE void setSimdWidth(uint32_t width) { simdWidth = width; } /*! Get the SIMD width (0 if not forced) */ uint32_t getSimdWidth(void) const { return simdWidth; } /*! Extract the register from the register file */ INLINE RegisterData getRegisterData(Register reg) const { return file.get(reg); } /*! Get the register family from the register itself */ INLINE RegisterFamily getRegisterFamily(Register reg) const { return this->getRegisterData(reg).family; } /*! Get the register from the tuple vector */ INLINE Register getRegister(Tuple ID, uint32_t which) const { return file.get(ID, which); } /*! Set the register from the tuple vector */ INLINE void setRegister(Tuple ID, uint32_t which, Register reg) { file.set(ID, which, reg); } /*! Get the register file */ INLINE const RegisterFile &getRegisterFile(void) const { return file; } /*! Get the given value ie immediate from the function */ INLINE Immediate getImmediate(ImmediateIndex ID) const { return immediates[ID]; } /*! Create a new immediate and returns its index */ INLINE ImmediateIndex newImmediate(const Immediate &imm) { const ImmediateIndex index(this->immediateNum()); this->immediates.push_back(imm); return index; } /*! Fast allocation / deallocation of instructions */ DECL_POOL(Instruction, insnPool); /*! Get input argument */ INLINE const FunctionArgument &getArg(uint32_t ID) const { GBE_ASSERT(args[ID] != NULL); return *args[ID]; } INLINE FunctionArgument &getArg(uint32_t ID) { GBE_ASSERT(args[ID] != NULL); return *args[ID]; } /*! Get arg ID. */ INLINE int32_t getArgID(FunctionArgument *requestArg) { for (uint32_t ID = 0; ID < args.size(); ID++) { if ( args[ID] == requestArg ) return ID; } GBE_ASSERTM(0, "Failed to get a valid argument ID."); return -1; } /*! Get the number of pushed registers */ INLINE uint32_t pushedNum(void) const { return pushMap.size(); } /*! Get the pushed data location for the given register */ INLINE const PushLocation *getPushLocation(Register reg) const { auto it = pushMap.find(reg); if (it == pushMap.end()) return NULL; else return &it->second; } /*! Get the map of pushed registers */ const PushMap &getPushMap(void) const { return this->pushMap; } /*! Get the map of pushed registers */ const LocationMap &getLocationMap(void) const { return this->locationMap; } /*! Get input argument from the register (linear research). Return NULL if * this is not an input argument */ INLINE const FunctionArgument *getArg(const Register ®) const { for (auto arg : args) if (arg->reg == reg) return arg; return NULL; } INLINE FunctionArgument *getArg(const Register ®) { for (auto arg : args) if (arg->reg == reg) return arg; return NULL; } /*! Get output register */ INLINE Register getOutput(uint32_t ID) const { return outputs[ID]; } /*! Get the argument location for the pushed register */ INLINE const PushLocation &getPushLocation(Register reg) { GBE_ASSERT(pushMap.contains(reg) == true); return pushMap.find(reg)->second; } /*! Says if this is the top basic block (entry point) */ bool isEntryBlock(const BasicBlock &bb) const; /*! Get function the entry point block */ const BasicBlock &getTopBlock(void) const; /*! Get the last block */ const BasicBlock &getBottomBlock(void) const; /*! Get the last block */ BasicBlock &getBottomBlock(void); /*! Get block from its label */ const BasicBlock &getBlock(LabelIndex label) const; /*! Get the label instruction from its label index */ const LabelInstruction *getLabelInstruction(LabelIndex index) const; /*! Return the number of instructions of the largest basic block */ uint32_t getLargestBlockSize(void) const; /*! Get the first index of the special registers and number of them */ uint32_t getFirstSpecialReg(void) const; uint32_t getSpecialRegNum(void) const; /*! Indicate if the given register is a special one (like localID in OCL) */ bool isSpecialReg(const Register ®) const; /*! Create a new label (still not bound to a basic block) */ LabelIndex newLabel(void); /*! Create the control flow graph */ void computeCFG(void); /*! Sort labels in increasing orders (top block has the smallest label) */ void sortLabels(void); /*! Get the pointer family */ RegisterFamily getPointerFamily(void) const; /*! Number of registers in the register file */ INLINE uint32_t regNum(void) const { return file.regNum(); } /*! Number of register tuples in the register file */ INLINE uint32_t tupleNum(void) const { return file.tupleNum(); } /*! Number of labels in the function */ INLINE uint32_t labelNum(void) const { return labels.size(); } /*! Number of immediate values in the function */ INLINE uint32_t immediateNum(void) const { return immediates.size(); } /*! Get the number of argument register */ INLINE uint32_t argNum(void) const { return args.size(); } /*! Get the number of output register */ INLINE uint32_t outputNum(void) const { return outputs.size(); } /*! Number of blocks in the function */ INLINE uint32_t blockNum(void) const { return blocks.size(); } /*! Output an immediate value in a stream */ void outImmediate(std::ostream &out, ImmediateIndex index) const; /*! Apply the given functor on all basic blocks */ template INLINE void foreachBlock(const T &functor) const { for (auto block : blocks) functor(*block); } /*! Apply the given functor on all instructions */ template INLINE void foreachInstruction(const T &functor) const { for (auto block : blocks) block->foreach(functor); } /*! Does it use SLM */ INLINE bool getUseSLM(void) const { return this->useSLM; } /*! Change the SLM config for the function */ INLINE bool setUseSLM(bool useSLM) { return this->useSLM = useSLM; } /*! get SLM size needed for local variable inside kernel function */ INLINE uint32_t getSLMSize(void) const { return this->slmSize; } /*! set slm size needed for local variable inside kernel function */ INLINE void setSLMSize(uint32_t size) { this->slmSize = size; } /*! Get sampler set in this function */ SamplerSet* getSamplerSet(void) const {return samplerSet; } /*! Get image set in this function */ ImageSet* getImageSet(void) const {return imageSet; } private: friend class Context; //!< Can freely modify a function std::string name; //!< Function name const Unit &unit; //!< Function belongs to this unit vector args; //!< Input registers of the function vector outputs; //!< Output registers of the function vector labels; //!< Each label points to a basic block vector immediates; //!< All immediate values in the function vector blocks; //!< All chained basic blocks RegisterFile file; //!< RegisterDatas used by the instructions Profile profile; //!< Current function profile PushMap pushMap; //!< Pushed function arguments (reg->loc) LocationMap locationMap; //!< Pushed function arguments (loc->reg) uint32_t simdWidth; //!< 8 or 16 if forced, 0 otherwise bool useSLM; //!< Is SLM required? uint32_t slmSize; //!< local variable size inside kernel function SamplerSet *samplerSet; //!< samplers used in this function. ImageSet* imageSet; //!< Image set in this function's arguments.. GBE_CLASS(Function); //!< Use custom allocator }; /*! Output the function string in the given stream */ std::ostream &operator<< (std::ostream &out, const Function &fn); } /* namespace ir */ } /* namespace gbe */ #endif /* __GBE_IR_FUNCTION_HPP__ */ Release_v0.3/backend/src/ir/image.cpp000066400000000000000000000205761223142177000175730ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * */ /** * \file image.cpp * */ #include "image.hpp" #include "context.hpp" #include "ocl_common_defines.h" #include "backend/program.h" namespace gbe { namespace ir { static uint32_t getInfoOffset4Type(struct ImageInfo *imageInfo, int type) { switch (type) { case GetImageInfoInstruction::WIDTH: return imageInfo->wSlot; case GetImageInfoInstruction::HEIGHT: return imageInfo->hSlot; case GetImageInfoInstruction::DEPTH: return imageInfo->depthSlot; case GetImageInfoInstruction::CHANNEL_DATA_TYPE: return imageInfo->dataTypeSlot; case GetImageInfoInstruction::CHANNEL_ORDER: return imageInfo->channelOrderSlot; default: NOT_IMPLEMENTED; } return 0; } static uint32_t setInfoOffset4Type(struct ImageInfo *imageInfo, int type, uint32_t offset) { switch (type) { case GetImageInfoInstruction::WIDTH: imageInfo->wSlot = offset; break; case GetImageInfoInstruction::HEIGHT: imageInfo->hSlot = offset; break; case GetImageInfoInstruction::DEPTH: imageInfo->depthSlot = offset; break; case GetImageInfoInstruction::CHANNEL_DATA_TYPE: imageInfo->dataTypeSlot = offset; break; case GetImageInfoInstruction::CHANNEL_ORDER: imageInfo->channelOrderSlot = offset; break; default: NOT_IMPLEMENTED; } return 0; } void ImageSet::appendInfo(ImageInfoKey key, uint32_t offset) { auto it = indexMap.find(key.index); assert(it != indexMap.end()); struct ImageInfo *imageInfo = it->second; setInfoOffset4Type(imageInfo, key.type, offset); } void ImageSet::append(Register imageReg, Context *ctx) { ir::FunctionArgument *arg = ctx->getFunction().getArg(imageReg); GBE_ASSERTM(arg && arg->type == ir::FunctionArgument::IMAGE, "Append an invalid reg to image set."); GBE_ASSERTM(regMap.find(imageReg) == regMap.end(), "Append the same image reg twice."); int32_t id = ctx->getFunction().getArgID(arg); struct ImageInfo *imageInfo = GBE_NEW(struct ImageInfo); imageInfo->arg_idx = id; imageInfo->idx = regMap.size() + gbe_get_image_base_index(); imageInfo->wSlot = -1; imageInfo->hSlot = -1; imageInfo->depthSlot = -1; imageInfo->dataTypeSlot = -1; imageInfo->channelOrderSlot = -1; imageInfo->dimOrderSlot = -1; regMap.insert(std::make_pair(imageReg, imageInfo)); indexMap.insert(std::make_pair(imageInfo->idx, imageInfo)); } const int32_t ImageSet::getInfoOffset(ImageInfoKey key) const { auto it = indexMap.find(key.index); if (it == indexMap.end()) return -1; struct ImageInfo *imageInfo = it->second; return getInfoOffset4Type(imageInfo, key.type); } const uint32_t ImageSet::getIdx(const Register imageReg) const { auto it = regMap.find(imageReg); GBE_ASSERT(it != regMap.end()); return it->second->idx; } void ImageSet::getData(struct ImageInfo *imageInfos) const { for(auto &it : regMap) imageInfos[it.second->idx - gbe_get_image_base_index()] = *it.second; } ImageSet::~ImageSet() { for(auto &it : regMap) GBE_DELETE(it.second); } #define OUT_UPDATE_SZ(elt) SERIALIZE_OUT(elt, outs, ret_size) #define IN_UPDATE_SZ(elt) DESERIALIZE_IN(elt, ins, total_size) /*! Implements the serialization. */ size_t ImageSet::serializeToBin(std::ostream& outs) { size_t ret_size = 0; OUT_UPDATE_SZ(magic_begin); OUT_UPDATE_SZ(regMap.size()); for (auto iter : regMap) { OUT_UPDATE_SZ(iter.first); OUT_UPDATE_SZ(iter.second->arg_idx); OUT_UPDATE_SZ(iter.second->idx); OUT_UPDATE_SZ(iter.second->wSlot); OUT_UPDATE_SZ(iter.second->hSlot); OUT_UPDATE_SZ(iter.second->depthSlot); OUT_UPDATE_SZ(iter.second->dataTypeSlot); OUT_UPDATE_SZ(iter.second->channelOrderSlot); OUT_UPDATE_SZ(iter.second->dimOrderSlot); } OUT_UPDATE_SZ(indexMap.size()); for (auto iter : indexMap) { OUT_UPDATE_SZ(iter.first); OUT_UPDATE_SZ(iter.second->arg_idx); OUT_UPDATE_SZ(iter.second->idx); OUT_UPDATE_SZ(iter.second->wSlot); OUT_UPDATE_SZ(iter.second->hSlot); OUT_UPDATE_SZ(iter.second->depthSlot); OUT_UPDATE_SZ(iter.second->dataTypeSlot); OUT_UPDATE_SZ(iter.second->channelOrderSlot); OUT_UPDATE_SZ(iter.second->dimOrderSlot); } OUT_UPDATE_SZ(magic_end); OUT_UPDATE_SZ(ret_size); return ret_size; } size_t ImageSet::deserializeFromBin(std::istream& ins) { size_t total_size = 0; uint32_t magic; size_t image_map_sz = 0; IN_UPDATE_SZ(magic); if (magic != magic_begin) return 0; IN_UPDATE_SZ(image_map_sz); //regMap for (size_t i = 0; i < image_map_sz; i++) { ir::Register reg; ImageInfo *img_info = GBE_NEW(struct ImageInfo);; IN_UPDATE_SZ(reg); IN_UPDATE_SZ(img_info->arg_idx); IN_UPDATE_SZ(img_info->idx); IN_UPDATE_SZ(img_info->wSlot); IN_UPDATE_SZ(img_info->hSlot); IN_UPDATE_SZ(img_info->depthSlot); IN_UPDATE_SZ(img_info->dataTypeSlot); IN_UPDATE_SZ(img_info->channelOrderSlot); IN_UPDATE_SZ(img_info->dimOrderSlot); regMap.insert(std::make_pair(reg, img_info)); } IN_UPDATE_SZ(image_map_sz); //indexMap for (uint32_t i = 0; i < image_map_sz; i++) { uint32_t index; ImageInfo *img_info = GBE_NEW(struct ImageInfo);; IN_UPDATE_SZ(index); IN_UPDATE_SZ(img_info->arg_idx); IN_UPDATE_SZ(img_info->idx); IN_UPDATE_SZ(img_info->wSlot); IN_UPDATE_SZ(img_info->hSlot); IN_UPDATE_SZ(img_info->depthSlot); IN_UPDATE_SZ(img_info->dataTypeSlot); IN_UPDATE_SZ(img_info->channelOrderSlot); IN_UPDATE_SZ(img_info->dimOrderSlot); indexMap.insert(std::make_pair(index, img_info)); } IN_UPDATE_SZ(magic); if (magic != magic_end) return 0; size_t total_bytes; IN_UPDATE_SZ(total_bytes); if (total_bytes + sizeof(total_size) != total_size) return 0; return total_size; } void ImageSet::printStatus(int indent, std::ostream& outs) { using namespace std; string spaces = indent_to_str(indent); string spaces_nl = indent_to_str(indent + 4); outs << spaces << "------------ Begin ImageSet ------------" << "\n"; outs << spaces_nl << " ImageSet Map: [reg, arg_idx, idx, wSlot, hSlot, depthSlot, " "dataTypeSlot, channelOrderSlot, dimOrderSlot]\n"; outs << spaces_nl << " regMap size: " << regMap.size() << "\n"; for (auto iter : regMap) { outs << spaces_nl << " [" << iter.first << ", " << iter.second->arg_idx << ", " << iter.second->idx << ", " << iter.second->wSlot << ", " << iter.second->hSlot << ", " << iter.second->depthSlot << ", " << iter.second->dataTypeSlot << ", " << iter.second->channelOrderSlot << ", " << iter.second->dimOrderSlot << "]" << "\n"; } outs << spaces_nl << " ImageSet Map: [index, arg_idx, idx, wSlot, hSlot, depthSlot, " "dataTypeSlot, channelOrderSlot, dimOrderSlot]\n"; outs << spaces_nl << " regMap size: " << indexMap.size() << "\n"; for (auto iter : indexMap) { outs << spaces_nl << " [" << iter.first << ", " << iter.second->arg_idx << ", " << iter.second->idx << ", " << iter.second->wSlot << ", " << iter.second->hSlot << ", " << iter.second->depthSlot << ", " << iter.second->dataTypeSlot << ", " << iter.second->channelOrderSlot << ", " << iter.second->dimOrderSlot << ", " << "\n"; } outs << spaces << "------------- End ImageSet -------------" << "\n"; } } /* namespace ir */ } /* namespace gbe */ Release_v0.3/backend/src/ir/image.hpp000066400000000000000000000056021223142177000175710ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * */ /** * \file image.hpp * */ #ifndef __GBE_IR_IMAGE_HPP__ #define __GBE_IR_IMAGE_HPP__ #include "ir/register.hpp" #include "ir/instruction.hpp" // for ImageInfoKey #include "sys/map.hpp" extern "C" { struct ImageInfo; } namespace gbe { namespace ir { class Context; /*! An image set is a set of images which are defined in kernel args. * We use this set to gather the images here and allocate a unique index * for each individual image. And that individual image could be used * at backend to identify this image's location. */ class ImageSet : public Serializable { public: /*! Append an image argument. */ void append(Register imageReg, Context *ctx); /*! Append an image info slot. */ void appendInfo(ImageInfoKey key, uint32_t offset); /*! Get the image's index(actual location). */ const uint32_t getIdx(const Register imageReg) const; size_t getDataSize(void) { return regMap.size(); } size_t getDataSize(void) const { return regMap.size(); } const int32_t getInfoOffset(ImageInfoKey key) const; void getData(struct ImageInfo *imageInfos) const; void operator = (const ImageSet& other) { regMap.insert(other.regMap.begin(), other.regMap.end()); } ImageSet(const ImageSet& other) : regMap(other.regMap.begin(), other.regMap.end()) { } ImageSet() {} ~ImageSet(); static const uint32_t magic_begin = TO_MAGIC('I', 'M', 'A', 'G'); static const uint32_t magic_end = TO_MAGIC('G', 'A', 'M', 'I'); /* format: magic_begin | regMap_size | element_1 | ........ | element_n | indexMap_size | element_1 | ........ | element_n | magic_end | total_size */ /*! Implements the serialization. */ virtual size_t serializeToBin(std::ostream& outs); virtual size_t deserializeFromBin(std::istream& ins); virtual void printStatus(int indent, std::ostream& outs); private: map regMap; map indexMap; GBE_CLASS(ImageSet); }; } /* namespace ir */ } /* namespace gbe */ #endif /* __GBE_IR_IMAGE_HPP__ */ Release_v0.3/backend/src/ir/immediate.hpp000066400000000000000000000051371223142177000204500ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file value.hpp * * \author Benjamin Segovia */ #ifndef __GBE_IR_IMMEDIATE_HPP__ #define __GBE_IR_IMMEDIATE_HPP__ #include "ir/type.hpp" #include "sys/platform.hpp" namespace gbe { namespace ir { /*! The value as stored in the instruction */ class Immediate { public: INLINE Immediate(void) {} #define DECL_CONSTRUCTOR(TYPE, FIELD, IR_TYPE) \ Immediate(TYPE FIELD) { \ this->type = IR_TYPE; \ this->data.u64 = 0llu; \ this->data.FIELD = FIELD; \ } DECL_CONSTRUCTOR(bool, b, TYPE_BOOL) DECL_CONSTRUCTOR(int8_t, s8, TYPE_S8) DECL_CONSTRUCTOR(uint8_t, u8, TYPE_U8) DECL_CONSTRUCTOR(int16_t, s16, TYPE_S16) DECL_CONSTRUCTOR(uint16_t, u16, TYPE_S16) DECL_CONSTRUCTOR(int32_t, s32, TYPE_S32) DECL_CONSTRUCTOR(uint32_t, u32, TYPE_S32) DECL_CONSTRUCTOR(int64_t, s64, TYPE_S64) DECL_CONSTRUCTOR(uint64_t, u64, TYPE_S64) DECL_CONSTRUCTOR(float, f32, TYPE_FLOAT) DECL_CONSTRUCTOR(double, f64, TYPE_DOUBLE) #undef DECL_CONSTRUCTOR union { bool b; int8_t s8; uint8_t u8; int16_t s16; uint16_t u16; int32_t s32; uint32_t u32; int64_t s64; uint64_t u64; float f32; double f64; } data; //!< Value to store Type type; //!< Type of the value GBE_CLASS(Immediate); }; /*! Compare two immediates */ INLINE bool operator< (const Immediate &imm0, const Immediate &imm1) { if (imm0.type != imm1.type) return uint32_t(imm0.type) < uint32_t(imm1.type); return imm0.data.u64 < imm1.data.u64; } /*! A value is stored in a per-function vector. This is the index to it */ TYPE_SAFE(ImmediateIndex, uint16_t) } /* namespace ir */ } /* namespace gbe */ #endif /* __GBE_IR_IMMEDIATE_HPP__ */ Release_v0.3/backend/src/ir/instruction.cpp000066400000000000000000001556341223142177000210760ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file instruction.cpp * \author Benjamin Segovia */ #include "ir/instruction.hpp" #include "ir/function.hpp" namespace gbe { namespace ir { /////////////////////////////////////////////////////////////////////////// // Implements the concrete implementations of the instruction classes. We // cast an instruction to an internal class to run the given member function /////////////////////////////////////////////////////////////////////////// namespace internal { #define ALIGNED_INSTRUCTION ALIGNED(ALIGNOF(Instruction)) /*! Policy shared by all the internal instructions */ struct BasePolicy { /*! Create an instruction from its internal representation */ Instruction convert(void) const { return Instruction(reinterpret_cast(&this->opcode)); } /*! Output the opcode in the given stream */ INLINE void outOpcode(std::ostream &out) const { switch (opcode) { #define DECL_INSN(OPCODE, CLASS) case OP_##OPCODE: out << #OPCODE; break; #include "instruction.hxx" #undef DECL_INSN case OP_INVALID: NOT_SUPPORTED; break; }; } /*! Instruction opcode */ Opcode opcode; }; /*! For regular n source instructions */ template struct NSrcPolicy { INLINE uint32_t getSrcNum(void) const { return srcNum; } INLINE Register getSrc(const Function &fn, uint32_t ID) const { GBE_ASSERTM((int) ID < (int) srcNum, "Out-of-bound source"); return static_cast(this)->src[ID]; } INLINE void setSrc(Function &fn, uint32_t ID, Register reg) { GBE_ASSERTM((int) ID < (int) srcNum, "Out-of-bound source"); static_cast(this)->src[ID] = reg; } }; /*! For regular n destinations instructions */ template struct NDstPolicy { INLINE uint32_t getDstNum(void) const { return dstNum; } INLINE Register getDst(const Function &fn, uint32_t ID) const { GBE_ASSERTM((int) ID < (int) dstNum, "Out-of-bound destination"); return static_cast(this)->dst[ID]; } INLINE void setDst(Function &fn, uint32_t ID, Register reg) { GBE_ASSERTM((int) ID < (int) dstNum, "Out-of-bound destination"); static_cast(this)->dst[ID] = reg; } }; /*! For instructions that use a tuple for source */ template struct TupleSrcPolicy { INLINE uint32_t getSrcNum(void) const { return static_cast(this)->srcNum; } INLINE Register getSrc(const Function &fn, uint32_t ID) const { GBE_ASSERTM(ID < static_cast(this)->srcNum, "Out-of-bound source register"); return fn.getRegister(static_cast(this)->src, ID); } INLINE void setSrc(Function &fn, uint32_t ID, Register reg) { GBE_ASSERTM(ID < static_cast(this)->srcNum, "Out-of-bound source register"); return fn.setRegister(static_cast(this)->src, ID, reg); } }; /*! For instructions that use a tuple for destination */ template struct TupleDstPolicy { INLINE uint32_t getDstNum(void) const { return static_cast(this)->dstNum; } INLINE Register getDst(const Function &fn, uint32_t ID) const { GBE_ASSERTM(ID < static_cast(this)->dstNum, "Out-of-bound source register"); return fn.getRegister(static_cast(this)->dst, ID); } INLINE void setDst(Function &fn, uint32_t ID, Register reg) { GBE_ASSERTM(ID < static_cast(this)->dstNum, "Out-of-bound source register"); return fn.setRegister(static_cast(this)->dst, ID, reg); } }; /*! All unary and binary arithmetic instructions */ template // 1 or 2 class ALIGNED_INSTRUCTION NaryInstruction : public BasePolicy, public NSrcPolicy, srcNum>, public NDstPolicy, 1> { public: INLINE Type getType(void) const { return this->type; } INLINE bool wellFormed(const Function &fn, std::string &whyNot) const; INLINE void out(std::ostream &out, const Function &fn) const; Type type; //!< Type of the instruction Register dst[1]; //!< Index of the register in the register file Register src[srcNum]; //!< Indices of the sources }; /*! All 1-source arithmetic instructions */ class ALIGNED_INSTRUCTION UnaryInstruction : public NaryInstruction<1> { public: UnaryInstruction(Opcode opcode, Type type, Register dst, Register src) { this->opcode = opcode; this->type = type; this->dst[0] = dst; this->src[0] = src; } }; /*! All 2-source arithmetic instructions */ class ALIGNED_INSTRUCTION BinaryInstruction : public NaryInstruction<2> { public: BinaryInstruction(Opcode opcode, Type type, Register dst, Register src0, Register src1) { this->opcode = opcode; this->type = type; this->dst[0] = dst; this->src[0] = src0; this->src[1] = src1; } INLINE bool commutes(void) const { switch (opcode) { case OP_ADD: case OP_ADDSAT: case OP_XOR: case OP_OR: case OP_AND: case OP_MUL: return true; default: return false; } } }; class ALIGNED_INSTRUCTION TernaryInstruction : public BasePolicy, public NDstPolicy, public TupleSrcPolicy { public: TernaryInstruction(Opcode opcode, Type type, Register dst, Tuple src) { this->opcode = opcode; this->type = type; this->dst[0] = dst; this->src = src; } Type getType(void) const { return type; } bool wellFormed(const Function &fn, std::string &whyNot) const; INLINE void out(std::ostream &out, const Function &fn) const; Type type; Register dst[1]; Tuple src; static const uint32_t srcNum = 3; }; /*! Three sources mean we need a tuple to encode it */ class ALIGNED_INSTRUCTION SelectInstruction : public BasePolicy, public NDstPolicy, public TupleSrcPolicy { public: SelectInstruction(Type type, Register dst, Tuple src) { this->opcode = OP_SEL; this->type = type; this->dst[0] = dst; this->src = src; } INLINE Type getType(void) const { return this->type; } INLINE bool wellFormed(const Function &fn, std::string &whyNot) const; INLINE void out(std::ostream &out, const Function &fn) const; Type type; //!< Type of the instruction Register dst[1]; //!< Dst is the register index Tuple src; //!< 3 sources do not fit in 8 bytes -> use a tuple static const uint32_t srcNum = 3; }; /*! Comparison instructions take two sources of the same type and return a * boolean value. Since it is pretty similar to binary instruction, we * steal all the methods from it, except wellFormed (dst register is always * a boolean value) */ class ALIGNED_INSTRUCTION CompareInstruction : public NaryInstruction<2> { public: CompareInstruction(Opcode opcode, Type type, Register dst, Register src0, Register src1) { this->opcode = opcode; this->type = type; this->dst[0] = dst; this->src[0] = src0; this->src[1] = src1; } INLINE bool wellFormed(const Function &fn, std::string &whyNot) const; }; class ALIGNED_INSTRUCTION ConvertInstruction : public BasePolicy, public NDstPolicy, public NSrcPolicy { public: ConvertInstruction(Type dstType, Type srcType, Register dst, Register src, bool saturated=false) { this->opcode = saturated ? OP_SAT_CVT : OP_CVT; this->dst[0] = dst; this->src[0] = src; this->dstType = dstType; this->srcType = srcType; } INLINE Type getSrcType(void) const { return this->srcType; } INLINE Type getDstType(void) const { return this->dstType; } INLINE bool wellFormed(const Function &fn, std::string &whyNot) const; INLINE void out(std::ostream &out, const Function &fn) const; Register dst[1]; Register src[1]; Type dstType; //!< Type to convert to Type srcType; //!< Type to convert from }; class ALIGNED_INSTRUCTION AtomicInstruction : public BasePolicy, public TupleSrcPolicy, public NDstPolicy { public: AtomicInstruction(AtomicOps atomicOp, Register dst, AddressSpace addrSpace, Tuple src) { this->opcode = OP_ATOMIC; this->atomicOp = atomicOp; this->dst[0] = dst; this->src = src; this->addrSpace = addrSpace; srcNum = 2; if((atomicOp == ATOMIC_OP_INC) || (atomicOp == ATOMIC_OP_DEC)) srcNum = 1; if(atomicOp == ATOMIC_OP_CMPXCHG) srcNum = 3; } INLINE AddressSpace getAddressSpace(void) const { return this->addrSpace; } INLINE AtomicOps getAtomicOpcode(void) const { return this->atomicOp; } INLINE bool wellFormed(const Function &fn, std::string &whyNot) const; INLINE void out(std::ostream &out, const Function &fn) const; Register dst[1]; Tuple src; AddressSpace addrSpace; //!< Address space uint8_t srcNum:2; //! { public: INLINE BranchInstruction(Opcode op, LabelIndex labelIndex, Register predicate) { GBE_ASSERT(op == OP_BRA); this->opcode = op; this->predicate = predicate; this->labelIndex = labelIndex; this->hasPredicate = true; this->hasLabel = true; } INLINE BranchInstruction(Opcode op, LabelIndex labelIndex) { GBE_ASSERT(op == OP_BRA); this->opcode = OP_BRA; this->labelIndex = labelIndex; this->hasPredicate = false; this->hasLabel = true; } INLINE BranchInstruction(Opcode op) { GBE_ASSERT(op == OP_RET); this->opcode = OP_RET; this->hasPredicate = false; this->hasLabel = false; } INLINE LabelIndex getLabelIndex(void) const { GBE_ASSERTM(hasLabel, "No target label for this branch instruction"); return labelIndex; } INLINE uint32_t getSrcNum(void) const { return hasPredicate ? 1 : 0; } INLINE Register getSrc(const Function &fn, uint32_t ID) const { GBE_ASSERTM(hasPredicate, "No source for unpredicated branches"); GBE_ASSERTM(ID == 0, "Only one source for the branch instruction"); return predicate; } INLINE void setSrc(Function &fn, uint32_t ID, Register reg) { GBE_ASSERTM(hasPredicate, "No source for unpredicated branches"); GBE_ASSERTM(ID == 0, "Only one source for the branch instruction"); predicate = reg; } INLINE bool isPredicated(void) const { return hasPredicate; } INLINE bool wellFormed(const Function &fn, std::string &why) const; INLINE void out(std::ostream &out, const Function &fn) const; Register predicate; //!< Predication means conditional branch LabelIndex labelIndex; //!< Index of the label the branch targets bool hasPredicate:1; //!< Is it predicated? bool hasLabel:1; //!< Is there any target label? Register dst[0]; //!< No destination }; class ALIGNED_INSTRUCTION LoadInstruction : public BasePolicy, public NSrcPolicy { public: LoadInstruction(Type type, Tuple dstValues, Register offset, AddressSpace addrSpace, uint32_t valueNum, bool dwAligned) { GBE_ASSERT(valueNum < 128); this->opcode = OP_LOAD; this->type = type; this->offset = offset; this->values = dstValues; this->addrSpace = addrSpace; this->valueNum = valueNum; this->dwAligned = dwAligned ? 1 : 0; } INLINE Register getDst(const Function &fn, uint32_t ID) const { GBE_ASSERTM(ID < valueNum, "Out-of-bound source register"); return fn.getRegister(values, ID); } INLINE void setDst(Function &fn, uint32_t ID, Register reg) { GBE_ASSERTM(ID < valueNum, "Out-of-bound source register"); fn.setRegister(values, ID, reg); } INLINE uint32_t getDstNum(void) const { return valueNum; } INLINE Type getValueType(void) const { return type; } INLINE uint32_t getValueNum(void) const { return valueNum; } INLINE AddressSpace getAddressSpace(void) const { return addrSpace; } INLINE bool wellFormed(const Function &fn, std::string &why) const; INLINE void out(std::ostream &out, const Function &fn) const; INLINE bool isAligned(void) const { return !!dwAligned; } Type type; //!< Type to store Register src[0]; //!< Address where to load from Register offset; //!< Alias to make it similar to store Tuple values; //!< Values to load AddressSpace addrSpace; //!< Where to load uint8_t valueNum:7; //!< Number of values to load uint8_t dwAligned:1; //!< DWORD aligned is what matters with GEN }; class ALIGNED_INSTRUCTION StoreInstruction : public BasePolicy, public NDstPolicy { public: StoreInstruction(Type type, Tuple values, Register offset, AddressSpace addrSpace, uint32_t valueNum, bool dwAligned) { GBE_ASSERT(valueNum < 255); this->opcode = OP_STORE; this->type = type; this->offset = offset; this->values = values; this->addrSpace = addrSpace; this->valueNum = valueNum; this->dwAligned = dwAligned ? 1 : 0; } INLINE Register getSrc(const Function &fn, uint32_t ID) const { GBE_ASSERTM(ID < valueNum + 1u, "Out-of-bound source register for store"); if (ID == 0u) return offset; else return fn.getRegister(values, ID - 1); } INLINE void setSrc(Function &fn, uint32_t ID, Register reg) { GBE_ASSERTM(ID < valueNum + 1u, "Out-of-bound source register for store"); if (ID == 0u) offset = reg; else fn.setRegister(values, ID - 1, reg); } INLINE uint32_t getSrcNum(void) const { return valueNum + 1u; } INLINE uint32_t getValueNum(void) const { return valueNum; } INLINE Type getValueType(void) const { return type; } INLINE AddressSpace getAddressSpace(void) const { return addrSpace; } INLINE bool wellFormed(const Function &fn, std::string &why) const; INLINE void out(std::ostream &out, const Function &fn) const; INLINE bool isAligned(void) const { return !!dwAligned; } Type type; //!< Type to store Register offset; //!< First source is the offset where to store Tuple values; //!< Values to store AddressSpace addrSpace; //!< Where to store uint8_t valueNum:7; //!< Number of values to store uint8_t dwAligned:1; //!< DWORD aligned is what matters with GEN Register dst[0]; //!< No destination }; class ALIGNED_INSTRUCTION SampleInstruction : // TODO public BasePolicy, public TupleSrcPolicy, public TupleDstPolicy { public: SampleInstruction(Tuple dstTuple, Tuple srcTuple, Type dstType, Type srcType) { this->opcode = OP_SAMPLE; this->dst = dstTuple; this->src = srcTuple; this->dstType = dstType; this->srcType = srcType; } INLINE bool wellFormed(const Function &fn, std::string &why) const; INLINE void out(std::ostream &out, const Function &fn) const { this->outOpcode(out); out << "." << this->getDstType() << "." << this->getSrcType() << " surface id %" << this->getSrc(fn, 0) << " sampler %" << this->getSrc(fn, 1) << " coord u %" << this->getSrc(fn, 2) << " coord v %" << this->getSrc(fn, 3) << " coord w %" << this->getSrc(fn, 4) << " %" << this->getDst(fn, 0) << " %" << this->getDst(fn, 1) << " %" << this->getDst(fn, 2) << " %" << this->getDst(fn, 3); } Tuple src; Tuple dst; Type srcType; Type dstType; INLINE Type getSrcType(void) const { return this->srcType; } INLINE Type getDstType(void) const { return this->dstType; } static const uint32_t srcNum = 6; static const uint32_t dstNum = 4; }; class ALIGNED_INSTRUCTION TypedWriteInstruction : // TODO public BasePolicy, public TupleSrcPolicy, public NDstPolicy { public: INLINE TypedWriteInstruction(Tuple srcTuple, Type srcType, Type coordType) { this->opcode = OP_TYPED_WRITE; this->src = srcTuple; this->coordType = coordType; this->srcType = srcType; } INLINE bool wellFormed(const Function &fn, std::string &why) const; INLINE void out(std::ostream &out, const Function &fn) const { this->outOpcode(out); out << "." << this->getSrcType() << " surface id %" << this->getSrc(fn, 0) << " coord u %" << this->getSrc(fn, 1) << " coord v %" << this->getSrc(fn, 2) << " coord w %" << this->getSrc(fn, 3) << " %" << this->getSrc(fn, 4) << " %" << this->getSrc(fn, 5) << " %" << this->getSrc(fn, 6) << " %" << this->getSrc(fn, 7); } Tuple src; Type srcType; Type coordType; INLINE Type getSrcType(void) const { return this->srcType; } INLINE Type getCoordType(void) const { return this->coordType; } // bti, u, v, w, 4 data elements static const uint32_t srcNum = 8; Register dst[0]; //!< No dest register }; class ALIGNED_INSTRUCTION GetSamplerInfoInstruction : public BasePolicy, public NSrcPolicy, public NDstPolicy { public: GetSamplerInfoInstruction( Register dst, Register src) { this->opcode = OP_GET_SAMPLER_INFO; this->dst[0] = dst; this->src[0] = src; } INLINE bool wellFormed(const Function &fn, std::string &why) const; INLINE void out(std::ostream &out, const Function &fn) const { this->outOpcode(out); out << " sampler id %" << this->getSrc(fn, 0) << " %" << this->getDst(fn, 0); } Register src[1]; //!< Surface to get info Register dst[1]; //!< return value static const uint32_t dstNum = 1; }; class ALIGNED_INSTRUCTION GetImageInfoInstruction : public BasePolicy, public NSrcPolicy, public NDstPolicy { public: GetImageInfoInstruction( int type, Register dst, Register src, Register infoReg) { this->opcode = OP_GET_IMAGE_INFO; this->infoType = type; this->dst[0] = dst; this->src[0] = src; this->src[1] = infoReg; } INLINE uint32_t getInfoType(void) const { return infoType; } INLINE bool wellFormed(const Function &fn, std::string &why) const; INLINE void out(std::ostream &out, const Function &fn) const { this->outOpcode(out); out << "." << this->getInfoType() << " surface id %" << this->getSrc(fn, 0) << " %" << this->getDst(fn, 0); } uint8_t infoType; //!< Type of the requested information. Register src[2]; //!< Surface to get info Register dst[1]; //!< dest register to put the information. static const uint32_t dstNum = 1; }; class ALIGNED_INSTRUCTION LoadImmInstruction : public BasePolicy, public NSrcPolicy, public NDstPolicy { public: INLINE LoadImmInstruction(Type type, Register dst, ImmediateIndex index) { this->dst[0] = dst; this->opcode = OP_LOADI; this->immediateIndex = index; this->type = type; } INLINE Immediate getImmediate(const Function &fn) const { return fn.getImmediate(immediateIndex); } INLINE Type getType(void) const { return this->type; } bool wellFormed(const Function &fn, std::string &why) const; INLINE void out(std::ostream &out, const Function &fn) const; Register dst[1]; //!< RegisterData to store into Register src[0]; //!< No source register ImmediateIndex immediateIndex; //!< Index in the vector of immediates Type type; //!< Type of the immediate }; class ALIGNED_INSTRUCTION SyncInstruction : public BasePolicy, public NSrcPolicy, public NDstPolicy { public: INLINE SyncInstruction(uint32_t parameters) { this->opcode = OP_SYNC; this->parameters = parameters; } INLINE uint32_t getParameters(void) const { return this->parameters; } INLINE bool wellFormed(const Function &fn, std::string &why) const; INLINE void out(std::ostream &out, const Function &fn) const; uint32_t parameters; Register dst[0], src[0]; }; class ALIGNED_INSTRUCTION LabelInstruction : public BasePolicy, public NSrcPolicy, public NDstPolicy { public: INLINE LabelInstruction(LabelIndex labelIndex) { this->opcode = OP_LABEL; this->labelIndex = labelIndex; } INLINE LabelIndex getLabelIndex(void) const { return labelIndex; } INLINE bool wellFormed(const Function &fn, std::string &why) const; INLINE void out(std::ostream &out, const Function &fn) const; LabelIndex labelIndex; //!< Index of the label Register dst[0], src[0]; }; #undef ALIGNED_INSTRUCTION ///////////////////////////////////////////////////////////////////////// // Implements all the wellFormed methods ///////////////////////////////////////////////////////////////////////// /*! All Nary instruction registers must be of the same family and properly * defined (i.e. not out-of-bound) */ static INLINE bool checkRegisterData(RegisterFamily family, const Register &ID, const Function &fn, std::string &whyNot) { if (UNLIKELY(uint16_t(ID) >= fn.regNum())) { whyNot = "Out-of-bound destination register index"; return false; } const RegisterData reg = fn.getRegisterData(ID); if (UNLIKELY(reg.family != family)) { whyNot = "Destination family does not match instruction type"; return false; } return true; } /*! Special registers are *not* writeable */ static INLINE bool checkSpecialRegForWrite(const Register ®, const Function &fn, std::string &whyNot) { if (fn.isSpecialReg(reg) == true && reg != ir::ocl::stackptr) { whyNot = "Non stack pointer special registers are not writeable"; return false; } return true; } /*! We check that the given type belongs to the provided type family */ static INLINE bool checkTypeFamily(const Type &type, const Type *family, uint32_t typeNum, std::string &whyNot) { uint32_t typeID = 0; for (; typeID < typeNum; ++typeID) if (family[typeID] == type) break; if (typeID == typeNum) { whyNot = "Type is not supported by the instruction"; return false; } return true; } #define CHECK_TYPE(TYPE, FAMILY) \ do { \ if (UNLIKELY(checkTypeFamily(TYPE, FAMILY, FAMILY##Num, whyNot)) == false) \ return false; \ } while (0) static const Type madType[] = {TYPE_FLOAT}; static const uint32_t madTypeNum = ARRAY_ELEM_NUM(madType); // TODO add support for 64 bits values static const Type allButBool[] = {TYPE_S8, TYPE_U8, TYPE_S16, TYPE_U16, TYPE_S32, TYPE_U32, TYPE_S64, TYPE_U64, TYPE_FLOAT, TYPE_DOUBLE}; static const uint32_t allButBoolNum = ARRAY_ELEM_NUM(allButBool); // TODO add support for 64 bits values static const Type logicalType[] = {TYPE_S8, TYPE_U8, TYPE_S16, TYPE_U16, TYPE_S32, TYPE_U32, TYPE_S64, TYPE_U64, TYPE_BOOL}; static const uint32_t logicalTypeNum = ARRAY_ELEM_NUM(logicalType); // Unary and binary instructions share the same rules template INLINE bool NaryInstruction::wellFormed(const Function &fn, std::string &whyNot) const { const RegisterFamily family = getFamily(this->type); if (UNLIKELY(checkSpecialRegForWrite(dst[0], fn, whyNot) == false)) return false; if (UNLIKELY(checkRegisterData(family, dst[0], fn, whyNot) == false)) return false; for (uint32_t srcID = 0; srcID < srcNum; ++srcID) if (UNLIKELY(checkRegisterData(family, src[srcID], fn, whyNot) == false)) return false; // We actually support logical operations on boolean values for AND, OR, // and XOR switch (this->opcode) { case OP_OR: case OP_XOR: case OP_AND: CHECK_TYPE(this->type, logicalType); break; default: CHECK_TYPE(this->type, allButBool); break; case OP_POW: case OP_COS: case OP_SIN: case OP_RCP: case OP_ABS: case OP_RSQ: case OP_SQR: case OP_RNDD: case OP_RNDE: case OP_RNDU: case OP_RNDZ: const Type fp = TYPE_FLOAT; if (UNLIKELY(checkTypeFamily(TYPE_FLOAT, &fp, 1, whyNot)) == false) return false; break; } return true; } // First source must a boolean. Other must match the destination type INLINE bool SelectInstruction::wellFormed(const Function &fn, std::string &whyNot) const { const RegisterFamily family = getFamily(this->type); if (UNLIKELY(checkSpecialRegForWrite(dst[0], fn, whyNot) == false)) return false; if (UNLIKELY(checkRegisterData(family, dst[0], fn, whyNot) == false)) return false; if (UNLIKELY(src + 3u > fn.tupleNum())) { whyNot = "Out-of-bound index for ternary instruction"; return false; } const Register regID = fn.getRegister(src, 0); if (UNLIKELY(checkRegisterData(FAMILY_BOOL, regID, fn, whyNot) == false)) return false; for (uint32_t srcID = 1; srcID < 3; ++srcID) { const Register regID = fn.getRegister(src, srcID); if (UNLIKELY(checkRegisterData(family, regID, fn, whyNot) == false)) return false; } CHECK_TYPE(this->type, allButBool); return true; } // Pretty similar to binary instruction. Only the destination is of type // boolean INLINE bool CompareInstruction::wellFormed(const Function &fn, std::string &whyNot) const { if (UNLIKELY(checkSpecialRegForWrite(dst[0], fn, whyNot) == false)) return false; if (UNLIKELY(checkRegisterData(FAMILY_BOOL, dst[0], fn, whyNot) == false)) return false; const RegisterFamily family = getFamily(this->type); for (uint32_t srcID = 0; srcID < 2; ++srcID) if (UNLIKELY(checkRegisterData(family, src[srcID], fn, whyNot) == false)) return false; CHECK_TYPE(this->type, allButBool); return true; } // We can convert anything to anything, but types and families must match INLINE bool ConvertInstruction::wellFormed(const Function &fn, std::string &whyNot) const { const RegisterFamily dstFamily = getFamily(dstType); const RegisterFamily srcFamily = getFamily(srcType); if (UNLIKELY(checkSpecialRegForWrite(dst[0], fn, whyNot) == false)) return false; if (UNLIKELY(checkRegisterData(dstFamily, dst[0], fn, whyNot) == false)) return false; if (UNLIKELY(checkRegisterData(srcFamily, src[0], fn, whyNot) == false)) return false; CHECK_TYPE(this->dstType, allButBool); CHECK_TYPE(this->srcType, allButBool); return true; } // We can convert anything to anything, but types and families must match INLINE bool AtomicInstruction::wellFormed(const Function &fn, std::string &whyNot) const { if (UNLIKELY(checkSpecialRegForWrite(dst[0], fn, whyNot) == false)) return false; if (UNLIKELY(checkRegisterData(FAMILY_DWORD, dst[0], fn, whyNot) == false)) return false; for (uint32_t srcID = 0; srcID < srcNum; ++srcID) if (UNLIKELY(checkRegisterData(FAMILY_DWORD, getSrc(fn, srcID), fn, whyNot) == false)) return false; return true; } INLINE bool TernaryInstruction::wellFormed(const Function &fn, std::string &whyNot) const { const RegisterFamily family = getFamily(this->type); if (UNLIKELY(checkSpecialRegForWrite(dst[0], fn, whyNot) == false)) return false; if (UNLIKELY(checkRegisterData(family, dst[0], fn, whyNot) == false)) return false; if (UNLIKELY(src + 3u > fn.tupleNum())) { whyNot = "Out-of-bound index for ternary instruction"; return false; } for (uint32_t srcID = 0; srcID < 3; ++srcID) { const Register regID = fn.getRegister(src, srcID); if (UNLIKELY(checkRegisterData(family, regID, fn, whyNot) == false)) return false; } return true; } /*! Loads and stores follow the same restrictions */ template INLINE bool wellFormedLoadStore(const T &insn, const Function &fn, std::string &whyNot) { if (UNLIKELY(insn.offset >= fn.regNum())) { whyNot = "Out-of-bound offset register index"; return false; } if (UNLIKELY(insn.values + insn.valueNum > fn.tupleNum())) { whyNot = "Out-of-bound tuple index"; return false; } // Check all registers const RegisterFamily family = getFamily(insn.type); for (uint32_t valueID = 0; valueID < insn.valueNum; ++valueID) { const Register regID = fn.getRegister(insn.values, valueID); if (UNLIKELY(checkRegisterData(family, regID, fn, whyNot) == false)) return false; } CHECK_TYPE(insn.type, allButBool); return true; } INLINE bool LoadInstruction::wellFormed(const Function &fn, std::string &whyNot) const { const uint32_t dstNum = this->getDstNum(); for (uint32_t dstID = 0; dstID < dstNum; ++dstID) { const Register reg = this->getDst(fn, dstID); const bool isOK = checkSpecialRegForWrite(reg, fn, whyNot); if (UNLIKELY(isOK == false)) return false; } if (UNLIKELY(dstNum > Instruction::MAX_DST_NUM)) { whyNot = "Too many destinations for load instruction"; return false; } return wellFormedLoadStore(*this, fn, whyNot); } INLINE bool StoreInstruction::wellFormed(const Function &fn, std::string &whyNot) const { const uint32_t srcNum = this->getSrcNum(); if (UNLIKELY(srcNum > Instruction::MAX_SRC_NUM)) { whyNot = "Too many source for store instruction"; return false; } return wellFormedLoadStore(*this, fn, whyNot); } // TODO INLINE bool SampleInstruction::wellFormed(const Function &fn, std::string &why) const { return true; } INLINE bool TypedWriteInstruction::wellFormed(const Function &fn, std::string &why) const { return true; } INLINE bool GetImageInfoInstruction::wellFormed(const Function &fn, std::string &why) const { return true; } INLINE bool GetSamplerInfoInstruction::wellFormed(const Function &fn, std::string &why) const { return true; } // Ensure that types and register family match INLINE bool LoadImmInstruction::wellFormed(const Function &fn, std::string &whyNot) const { if (UNLIKELY(immediateIndex >= fn.immediateNum())) { whyNot = "Out-of-bound immediate value index"; return false; } const ir::Type immType = fn.getImmediate(immediateIndex).type; if (UNLIKELY(type != immType)) { whyNot = "Inconsistant type for the immediate value to load"; return false; } const RegisterFamily family = getFamily(type); if (UNLIKELY(checkSpecialRegForWrite(dst[0], fn, whyNot) == false)) return false; if (UNLIKELY(checkRegisterData(family, dst[0], fn, whyNot) == false)) return false; //Support all type IMM, disable check //CHECK_TYPE(this->type, allButBool); return true; } INLINE bool SyncInstruction::wellFormed(const Function &fn, std::string &whyNot) const { const uint32_t maxParams = SYNC_WORKGROUP_EXEC | SYNC_LOCAL_READ_FENCE | SYNC_LOCAL_WRITE_FENCE | SYNC_GLOBAL_READ_FENCE | SYNC_GLOBAL_WRITE_FENCE; if (UNLIKELY(this->parameters > maxParams)) { whyNot = "Invalid parameters for sync instruction"; return false; } else if (UNLIKELY(this->parameters == 0)) { whyNot = "Missing parameters for sync instruction"; return false; } return true; } // Only a label index is required INLINE bool LabelInstruction::wellFormed(const Function &fn, std::string &whyNot) const { if (UNLIKELY(labelIndex >= fn.labelNum())) { whyNot = "Out-of-bound label index"; return false; } return true; } // The label must exist and the register must of boolean family INLINE bool BranchInstruction::wellFormed(const Function &fn, std::string &whyNot) const { if (hasLabel) if (UNLIKELY(labelIndex >= fn.labelNum())) { whyNot = "Out-of-bound label index"; return false; } if (hasPredicate) if (UNLIKELY(checkRegisterData(FAMILY_BOOL, predicate, fn, whyNot) == false)) return false; return true; } #undef CHECK_TYPE ///////////////////////////////////////////////////////////////////////// // Implements all the output stream methods ///////////////////////////////////////////////////////////////////////// template INLINE void NaryInstruction::out(std::ostream &out, const Function &fn) const { this->outOpcode(out); out << "." << this->getType() << " %" << this->getDst(fn, 0); for (uint32_t i = 0; i < srcNum; ++i) out << " %" << this->getSrc(fn, i); } template static void ternaryOrSelectOut(const T &insn, std::ostream &out, const Function &fn) { insn.outOpcode(out); out << "." << insn.getType() << " %" << insn.getDst(fn, 0) << " %" << insn.getSrc(fn, 0) << " %" << insn.getSrc(fn, 1) << " %" << insn.getSrc(fn, 2); } INLINE void SelectInstruction::out(std::ostream &out, const Function &fn) const { ternaryOrSelectOut(*this, out, fn); } INLINE void TernaryInstruction::out(std::ostream &out, const Function &fn) const { ternaryOrSelectOut(*this, out, fn); } INLINE void AtomicInstruction::out(std::ostream &out, const Function &fn) const { this->outOpcode(out); out << "." << addrSpace; out << " %" << this->getDst(fn, 0); out << " {" << "%" << this->getSrc(fn, 0) << "}"; for (uint32_t i = 1; i < srcNum; ++i) out << " %" << this->getSrc(fn, i); } INLINE void ConvertInstruction::out(std::ostream &out, const Function &fn) const { this->outOpcode(out); out << "." << this->getDstType() << "." << this->getSrcType() << " %" << this->getDst(fn, 0) << " %" << this->getSrc(fn, 0); } INLINE void LoadInstruction::out(std::ostream &out, const Function &fn) const { this->outOpcode(out); out << "." << type << "." << addrSpace << (dwAligned ? "." : ".un") << "aligned"; out << " {"; for (uint32_t i = 0; i < valueNum; ++i) out << "%" << this->getDst(fn, i) << (i != (valueNum-1u) ? " " : ""); out << "}"; out << " %" << this->getSrc(fn, 0); } INLINE void StoreInstruction::out(std::ostream &out, const Function &fn) const { this->outOpcode(out); out << "." << type << "." << addrSpace << (dwAligned ? "." : ".un") << "aligned"; out << " %" << this->getSrc(fn, 0) << " {"; for (uint32_t i = 0; i < valueNum; ++i) out << "%" << this->getSrc(fn, i+1) << (i != (valueNum-1u) ? " " : ""); out << "}"; } INLINE void LabelInstruction::out(std::ostream &out, const Function &fn) const { this->outOpcode(out); out << " $" << labelIndex; } INLINE void BranchInstruction::out(std::ostream &out, const Function &fn) const { this->outOpcode(out); if (hasPredicate) out << "<%" << this->getSrc(fn, 0) << ">"; if (hasLabel) out << " -> label$" << labelIndex; } INLINE void LoadImmInstruction::out(std::ostream &out, const Function &fn) const { this->outOpcode(out); out << "." << type; out << " %" << this->getDst(fn,0) << " "; fn.outImmediate(out, immediateIndex); } static const char *syncStr[syncFieldNum] = { "workgroup", "local_read", "local_write", "global_read", "global_write" }; INLINE void SyncInstruction::out(std::ostream &out, const Function &fn) const { this->outOpcode(out); for (uint32_t field = 0; field < syncFieldNum; ++field) if (this->parameters & (1 << field)) out << "." << syncStr[field]; } } /* namespace internal */ std::ostream &operator<< (std::ostream &out, AddressSpace addrSpace) { switch (addrSpace) { case MEM_GLOBAL: return out << "global"; case MEM_LOCAL: return out << "local"; case MEM_CONSTANT: return out << "constant"; case MEM_PRIVATE: return out << "private"; case IMAGE: return out << "image"; case MEM_INVALID: return out << "invalid"; }; return out; } /////////////////////////////////////////////////////////////////////////// // Implements the various introspection functions /////////////////////////////////////////////////////////////////////////// template struct HelperIntrospection { enum { value = 0 }; }; template struct HelperIntrospection { enum { value = 1 }; }; RegisterData Instruction::getDstData(uint32_t ID) const { const Function &fn = this->getFunction(); return fn.getRegisterData(this->getDst(ID)); } RegisterData Instruction::getSrcData(uint32_t ID) const { const Function &fn = this->getFunction(); return fn.getRegisterData(this->getSrc(ID)); } #define DECL_INSN(OPCODE, CLASS) \ case OP_##OPCODE: \ return HelperIntrospection::value == 1; #define START_INTROSPECTION(CLASS) \ static_assert(sizeof(internal::CLASS) == sizeof(uint64_t), \ "Bad instruction size"); \ static_assert(offsetof(internal::CLASS, opcode) == 0, \ "Bad opcode offset"); \ bool CLASS::isClassOf(const Instruction &insn) { \ const Opcode op = insn.getOpcode(); \ typedef CLASS RefClass; \ switch (op) { #define END_INTROSPECTION(CLASS) \ default: return false; \ }; \ } START_INTROSPECTION(UnaryInstruction) #include "ir/instruction.hxx" END_INTROSPECTION(UnaryInstruction) START_INTROSPECTION(BinaryInstruction) #include "ir/instruction.hxx" END_INTROSPECTION(BinaryInstruction) START_INTROSPECTION(CompareInstruction) #include "ir/instruction.hxx" END_INTROSPECTION(CompareInstruction) START_INTROSPECTION(ConvertInstruction) #include "ir/instruction.hxx" END_INTROSPECTION(ConvertInstruction) START_INTROSPECTION(AtomicInstruction) #include "ir/instruction.hxx" END_INTROSPECTION(AtomicInstruction) START_INTROSPECTION(SelectInstruction) #include "ir/instruction.hxx" END_INTROSPECTION(SelectInstruction) START_INTROSPECTION(TernaryInstruction) #include "ir/instruction.hxx" END_INTROSPECTION(TernaryInstruction) START_INTROSPECTION(BranchInstruction) #include "ir/instruction.hxx" END_INTROSPECTION(BranchInstruction) START_INTROSPECTION(SampleInstruction) #include "ir/instruction.hxx" END_INTROSPECTION(SampleInstruction) START_INTROSPECTION(TypedWriteInstruction) #include "ir/instruction.hxx" END_INTROSPECTION(TypedWriteInstruction) START_INTROSPECTION(GetImageInfoInstruction) #include "ir/instruction.hxx" END_INTROSPECTION(GetImageInfoInstruction) START_INTROSPECTION(GetSamplerInfoInstruction) #include "ir/instruction.hxx" END_INTROSPECTION(GetSamplerInfoInstruction) START_INTROSPECTION(LoadImmInstruction) #include "ir/instruction.hxx" END_INTROSPECTION(LoadImmInstruction) START_INTROSPECTION(LoadInstruction) #include "ir/instruction.hxx" END_INTROSPECTION(LoadInstruction) START_INTROSPECTION(StoreInstruction) #include "ir/instruction.hxx" END_INTROSPECTION(StoreInstruction) START_INTROSPECTION(SyncInstruction) #include "ir/instruction.hxx" END_INTROSPECTION(SyncInstruction) START_INTROSPECTION(LabelInstruction) #include "ir/instruction.hxx" END_INTROSPECTION(LabelInstruction) #undef END_INTROSPECTION #undef START_INTROSPECTION #undef DECL_INSN /////////////////////////////////////////////////////////////////////////// // Implements the function dispatching from public to internal with some // macro horrors /////////////////////////////////////////////////////////////////////////// #define DECL_INSN(OPCODE, CLASS) \ case OP_##OPCODE: return reinterpret_cast(this)->CALL; #define START_FUNCTION(CLASS, RET, PROTOTYPE) \ RET CLASS::PROTOTYPE const { \ const Opcode op = this->getOpcode(); \ switch (op) { #define END_FUNCTION(CLASS, RET) \ case OP_INVALID: return RET(); \ }; \ return RET(); \ } #define CALL getSrcNum() START_FUNCTION(Instruction, uint32_t, getSrcNum(void)) #include "ir/instruction.hxx" END_FUNCTION(Instruction, uint32_t) #undef CALL #define CALL getDstNum() START_FUNCTION(Instruction, uint32_t, getDstNum(void)) #include "ir/instruction.hxx" END_FUNCTION(Instruction, uint32_t) #undef CALL #undef DECL_INSN #define DECL_INSN(OPCODE, CLASS) \ case OP_##OPCODE: \ { \ const Function &fn = this->getFunction(); \ return reinterpret_cast(this)->CALL; \ } #define CALL wellFormed(fn, whyNot) START_FUNCTION(Instruction, bool, wellFormed(std::string &whyNot)) #include "ir/instruction.hxx" END_FUNCTION(Instruction, bool) #undef CALL #define CALL getDst(fn, ID) START_FUNCTION(Instruction, Register, getDst(uint32_t ID)) #include "ir/instruction.hxx" END_FUNCTION(Instruction, Register) #undef CALL #define CALL getSrc(fn, ID) START_FUNCTION(Instruction, Register, getSrc(uint32_t ID)) #include "ir/instruction.hxx" END_FUNCTION(Instruction, Register) #undef CALL #undef DECL_INSN #undef END_FUNCTION #undef START_FUNCTION void Instruction::setSrc(uint32_t srcID, Register reg) { Function &fn = this->getFunction(); #if GBE_DEBUG const RegisterData oldData = this->getSrcData(srcID); const RegisterData newData = fn.getRegisterData(reg); GBE_ASSERT(oldData.family == newData.family); #endif /* GBE_DEBUG */ const Opcode op = this->getOpcode(); switch (op) { #define DECL_INSN(OP, FAMILY)\ case OP_##OP:\ reinterpret_cast(this)->setSrc(fn, srcID, reg);\ break; #include "instruction.hxx" #undef DECL_INSN case OP_INVALID: NOT_SUPPORTED; break; }; } void Instruction::setDst(uint32_t dstID, Register reg) { Function &fn = this->getFunction(); #if GBE_DEBUG const RegisterData oldData = this->getDstData(dstID); const RegisterData newData = fn.getRegisterData(reg); GBE_ASSERT(oldData.family == newData.family); #endif /* GBE_DEBUG */ const Opcode op = this->getOpcode(); switch (op) { #define DECL_INSN(OP, FAMILY)\ case OP_##OP:\ reinterpret_cast(this)->setDst(fn, dstID, reg);\ break; #include "instruction.hxx" #undef DECL_INSN case OP_INVALID: NOT_SUPPORTED; break; }; } const Function &Instruction::getFunction(void) const { const BasicBlock *bb = this->getParent(); GBE_ASSERT(bb != NULL); return bb->getParent(); } Function &Instruction::getFunction(void) { BasicBlock *bb = this->getParent(); GBE_ASSERT(bb != NULL); return bb->getParent(); } void Instruction::replace(Instruction *other) const { Function &fn = other->getFunction(); Instruction *insn = fn.newInstruction(*this); intrusive_list_node *prev = other->prev; insn->parent = other->parent; other->remove(); append(insn, prev); } void Instruction::remove(void) { Function &fn = this->getFunction(); unlink(this); fn.deleteInstruction(this); } bool Instruction::hasSideEffect(void) const { return opcode == OP_STORE || opcode == OP_TYPED_WRITE || opcode == OP_SYNC || opcode == OP_ATOMIC; } #define DECL_MEM_FN(CLASS, RET, PROTOTYPE, CALL) \ RET CLASS::PROTOTYPE const { \ return reinterpret_cast(this)->CALL; \ } DECL_MEM_FN(UnaryInstruction, Type, getType(void), getType()) DECL_MEM_FN(BinaryInstruction, Type, getType(void), getType()) DECL_MEM_FN(BinaryInstruction, bool, commutes(void), commutes()) DECL_MEM_FN(SelectInstruction, Type, getType(void), getType()) DECL_MEM_FN(TernaryInstruction, Type, getType(void), getType()) DECL_MEM_FN(CompareInstruction, Type, getType(void), getType()) DECL_MEM_FN(ConvertInstruction, Type, getSrcType(void), getSrcType()) DECL_MEM_FN(ConvertInstruction, Type, getDstType(void), getDstType()) DECL_MEM_FN(AtomicInstruction, AddressSpace, getAddressSpace(void), getAddressSpace()) DECL_MEM_FN(AtomicInstruction, AtomicOps, getAtomicOpcode(void), getAtomicOpcode()) DECL_MEM_FN(StoreInstruction, Type, getValueType(void), getValueType()) DECL_MEM_FN(StoreInstruction, uint32_t, getValueNum(void), getValueNum()) DECL_MEM_FN(StoreInstruction, AddressSpace, getAddressSpace(void), getAddressSpace()) DECL_MEM_FN(StoreInstruction, bool, isAligned(void), isAligned()) DECL_MEM_FN(LoadInstruction, Type, getValueType(void), getValueType()) DECL_MEM_FN(LoadInstruction, uint32_t, getValueNum(void), getValueNum()) DECL_MEM_FN(LoadInstruction, AddressSpace, getAddressSpace(void), getAddressSpace()) DECL_MEM_FN(LoadInstruction, bool, isAligned(void), isAligned()) DECL_MEM_FN(LoadImmInstruction, Type, getType(void), getType()) DECL_MEM_FN(LabelInstruction, LabelIndex, getLabelIndex(void), getLabelIndex()) DECL_MEM_FN(BranchInstruction, bool, isPredicated(void), isPredicated()) DECL_MEM_FN(BranchInstruction, LabelIndex, getLabelIndex(void), getLabelIndex()) DECL_MEM_FN(SyncInstruction, uint32_t, getParameters(void), getParameters()) DECL_MEM_FN(SampleInstruction, Type, getSrcType(void), getSrcType()) DECL_MEM_FN(SampleInstruction, Type, getDstType(void), getDstType()) DECL_MEM_FN(TypedWriteInstruction, Type, getSrcType(void), getSrcType()) DECL_MEM_FN(TypedWriteInstruction, Type, getCoordType(void), getCoordType()) DECL_MEM_FN(GetImageInfoInstruction, uint32_t, getInfoType(void), getInfoType()) #undef DECL_MEM_FN Immediate LoadImmInstruction::getImmediate(void) const { const Function &fn = this->getFunction(); return reinterpret_cast(this)->getImmediate(fn); } /////////////////////////////////////////////////////////////////////////// // Implements the emission functions /////////////////////////////////////////////////////////////////////////// // For all unary functions with given opcode Instruction ALU1(Opcode opcode, Type type, Register dst, Register src) { return internal::UnaryInstruction(opcode, type, dst, src).convert(); } // All unary functions #define DECL_EMIT_FUNCTION(NAME) \ Instruction NAME(Type type, Register dst, Register src) { \ return ALU1(OP_##NAME, type, dst, src);\ } DECL_EMIT_FUNCTION(MOV) DECL_EMIT_FUNCTION(FBH) DECL_EMIT_FUNCTION(FBL) DECL_EMIT_FUNCTION(COS) DECL_EMIT_FUNCTION(SIN) DECL_EMIT_FUNCTION(LOG) DECL_EMIT_FUNCTION(SQR) DECL_EMIT_FUNCTION(RSQ) DECL_EMIT_FUNCTION(RNDD) DECL_EMIT_FUNCTION(RNDE) DECL_EMIT_FUNCTION(RNDU) DECL_EMIT_FUNCTION(RNDZ) #undef DECL_EMIT_FUNCTION // All binary functions #define DECL_EMIT_FUNCTION(NAME) \ Instruction NAME(Type type, Register dst, Register src0, Register src1) { \ return internal::BinaryInstruction(OP_##NAME, type, dst, src0, src1).convert(); \ } DECL_EMIT_FUNCTION(POW) DECL_EMIT_FUNCTION(MUL) DECL_EMIT_FUNCTION(ADD) DECL_EMIT_FUNCTION(ADDSAT) DECL_EMIT_FUNCTION(SUB) DECL_EMIT_FUNCTION(SUBSAT) DECL_EMIT_FUNCTION(MUL_HI) DECL_EMIT_FUNCTION(I64_MUL_HI) DECL_EMIT_FUNCTION(UPSAMPLE_SHORT) DECL_EMIT_FUNCTION(UPSAMPLE_INT) DECL_EMIT_FUNCTION(UPSAMPLE_LONG) DECL_EMIT_FUNCTION(DIV) DECL_EMIT_FUNCTION(REM) DECL_EMIT_FUNCTION(SHL) DECL_EMIT_FUNCTION(SHR) DECL_EMIT_FUNCTION(ASR) DECL_EMIT_FUNCTION(BSF) DECL_EMIT_FUNCTION(BSB) DECL_EMIT_FUNCTION(OR) DECL_EMIT_FUNCTION(XOR) DECL_EMIT_FUNCTION(AND) DECL_EMIT_FUNCTION(HADD) DECL_EMIT_FUNCTION(RHADD) DECL_EMIT_FUNCTION(I64HADD) DECL_EMIT_FUNCTION(I64RHADD) #undef DECL_EMIT_FUNCTION // SEL Instruction SEL(Type type, Register dst, Tuple src) { return internal::SelectInstruction(type, dst, src).convert(); } Instruction I64MADSAT(Type type, Register dst, Tuple src) { return internal::TernaryInstruction(OP_I64MADSAT, type, dst, src).convert(); } // All compare functions #define DECL_EMIT_FUNCTION(NAME) \ Instruction NAME(Type type, Register dst, Register src0, Register src1) { \ const internal::CompareInstruction insn(OP_##NAME, type, dst, src0, src1); \ return insn.convert(); \ } DECL_EMIT_FUNCTION(EQ) DECL_EMIT_FUNCTION(NE) DECL_EMIT_FUNCTION(LE) DECL_EMIT_FUNCTION(LT) DECL_EMIT_FUNCTION(GE) DECL_EMIT_FUNCTION(GT) #undef DECL_EMIT_FUNCTION // CVT Instruction CVT(Type dstType, Type srcType, Register dst, Register src) { return internal::ConvertInstruction(dstType, srcType, dst, src).convert(); } // saturated convert Instruction SAT_CVT(Type dstType, Type srcType, Register dst, Register src) { return internal::ConvertInstruction(dstType, srcType, dst, src, true).convert(); } // For all unary functions with given opcode Instruction ATOMIC(AtomicOps atomicOp, Register dst, AddressSpace space, Tuple src) { return internal::AtomicInstruction(atomicOp, dst, space, src).convert(); } // BRA Instruction BRA(LabelIndex labelIndex) { return internal::BranchInstruction(OP_BRA, labelIndex).convert(); } Instruction BRA(LabelIndex labelIndex, Register pred) { return internal::BranchInstruction(OP_BRA, labelIndex, pred).convert(); } // RET Instruction RET(void) { return internal::BranchInstruction(OP_RET).convert(); } // LOADI Instruction LOADI(Type type, Register dst, ImmediateIndex value) { return internal::LoadImmInstruction(type, dst, value).convert(); } // LOAD and STORE #define DECL_EMIT_FUNCTION(NAME, CLASS) \ Instruction NAME(Type type, \ Tuple tuple, \ Register offset, \ AddressSpace space, \ uint32_t valueNum, \ bool dwAligned) \ { \ return internal::CLASS(type,tuple,offset,space,valueNum,dwAligned).convert(); \ } DECL_EMIT_FUNCTION(LOAD, LoadInstruction) DECL_EMIT_FUNCTION(STORE, StoreInstruction) #undef DECL_EMIT_FUNCTION // FENCE Instruction SYNC(uint32_t parameters) { return internal::SyncInstruction(parameters).convert(); } // LABEL Instruction LABEL(LabelIndex labelIndex) { return internal::LabelInstruction(labelIndex).convert(); } // SAMPLE Instruction SAMPLE(Tuple dst, Tuple src, Type dstType, Type srcType) { return internal::SampleInstruction(dst, src, dstType, srcType).convert(); } Instruction TYPED_WRITE(Tuple src, Type srcType, Type coordType) { return internal::TypedWriteInstruction(src, srcType, coordType).convert(); } Instruction GET_IMAGE_INFO(int infoType, Register dst, Register src, Register infoReg) { return internal::GetImageInfoInstruction(infoType, dst, src, infoReg).convert(); } Instruction GET_SAMPLER_INFO(Register dst, Register src) { return internal::GetSamplerInfoInstruction(dst, src).convert(); } std::ostream &operator<< (std::ostream &out, const Instruction &insn) { const Function &fn = insn.getFunction(); switch (insn.getOpcode()) { #define DECL_INSN(OPCODE, CLASS) \ case OP_##OPCODE: \ reinterpret_cast(insn).out(out, fn); \ break; #include "instruction.hxx" #undef DECL_INSN case OP_INVALID: NOT_SUPPORTED; break; }; return out; } } /* namespace ir */ } /* namespace gbe */ Release_v0.3/backend/src/ir/instruction.hpp000066400000000000000000000615501223142177000210740ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file instruction.hpp * \author Benjamin Segovia */ #ifndef __GBE_IR_INSTRUCTION_HPP__ #define __GBE_IR_INSTRUCTION_HPP__ #include "ir/register.hpp" #include "ir/immediate.hpp" #include "ir/type.hpp" #include "sys/platform.hpp" #include "sys/intrusive_list.hpp" #include namespace gbe { namespace ir { /*! All opcodes */ enum Opcode : uint8_t { #define DECL_INSN(INSN, FAMILY) OP_##INSN, #include "ir/instruction.hxx" #undef DECL_INSN OP_INVALID }; /*! Different memory spaces */ enum AddressSpace : uint8_t { MEM_GLOBAL = 0, //!< Global memory (a la OCL) MEM_LOCAL, //!< Local memory (thread group memory) MEM_CONSTANT, //!< Immutable global memory MEM_PRIVATE, //!< Per thread private memory IMAGE, //!< For texture image. MEM_INVALID }; enum AtomicOps { ATOMIC_OP_AND = 1, ATOMIC_OP_OR = 2, ATOMIC_OP_XOR = 3, ATOMIC_OP_XCHG = 4, ATOMIC_OP_INC = 5, ATOMIC_OP_DEC = 6, ATOMIC_OP_ADD = 7, ATOMIC_OP_SUB = 8, ATOMIC_OP_IMAX = 10, ATOMIC_OP_IMIN = 11, ATOMIC_OP_UMAX = 12, ATOMIC_OP_UMIN = 13, ATOMIC_OP_CMPXCHG = 14, ATOMIC_OP_INVALID }; /* Vote function per hardware thread */ enum VotePredicate : uint8_t { VOTE_ALL = 0, VOTE_ANY }; /*! Output the memory space */ std::ostream &operator<< (std::ostream &out, AddressSpace addrSpace); /*! A label is identified with an unsigned short */ TYPE_SAFE(LabelIndex, uint16_t) /*! Function class contains the register file and the register tuple. Any * information related to the registers may therefore require a function */ class Function; /*! Contains the stream of instructions */ class BasicBlock; /////////////////////////////////////////////////////////////////////////// /// All public instruction classes as manipulated by all public classes /////////////////////////////////////////////////////////////////////////// /*! Stores instruction internal data and opcode */ class ALIGNED(sizeof(uint64_t)) InstructionBase { public: /*! Initialize the instruction from a 8 bytes stream */ INLINE InstructionBase(const char *stream) { opcode = Opcode(stream[0]); for (uint32_t byte = 0; byte < opaqueSize; ++byte) opaque[byte] = stream[byte+1]; } /*! Uninitialized instruction */ INLINE InstructionBase(void) {} /*! Get the instruction opcode */ INLINE Opcode getOpcode(void) const { return opcode; } protected: enum { opaqueSize = sizeof(uint64_t)-sizeof(uint8_t) }; Opcode opcode; //!< Idendifies the instruction char opaque[opaqueSize]; //!< Remainder of it GBE_CLASS(InstructionBase); //!< Use internal allocators }; /*! Store the instruction description in 32 bytes */ class Instruction : public InstructionBase, public intrusive_list_node { public: /*! Initialize the instruction from a 8 bytes stream */ INLINE Instruction(const char *stream) : InstructionBase(stream) { parent = NULL; } /*! Copy the private fields and give it the same parent */ INLINE Instruction(const Instruction &other) : InstructionBase(reinterpret_cast(&other.opcode)) { parent = other.parent; } private: /*! To be consistant with copy constructor */ INLINE Instruction &operator= (const Instruction &other) { return *this; } public: /*! Nothing to do here */ INLINE ~Instruction(void) {} /*! Uninitialized instruction */ INLINE Instruction(void) {} /*! Get the number of sources for this instruction */ uint32_t getSrcNum(void) const; /*! Get the number of destination for this instruction */ uint32_t getDstNum(void) const; /*! Get the register index of the given source */ Register getSrc(uint32_t ID = 0u) const; /*! Get the register index of the given destination */ Register getDst(uint32_t ID = 0u) const; /*! Get the register of the given source */ RegisterData getDstData(uint32_t ID = 0u) const; /*! Get the register of the given destination */ RegisterData getSrcData(uint32_t ID = 0u) const; /*! Set a register in src srcID */ void setSrc(uint32_t srcID, Register reg); /*! Set a register in dst dstID */ void setDst(uint32_t dstID, Register reg); /*! Is there any side effect in the memory sub-system? */ bool hasSideEffect(void) const; /*! Get / set the parent basic block */ BasicBlock *getParent(void) { return parent; } const BasicBlock *getParent(void) const { return parent; } void setParent(BasicBlock *block) { this->parent = block; } /*! Get the function from the parent basic block */ const Function &getFunction(void) const; Function &getFunction(void); /*! Check that the instruction is well formed (type properly match, * registers not of bound and so on). If not well formed, provide a reason * in string why */ bool wellFormed(std::string &why) const; /*! Replace other by this instruction */ void replace(Instruction *other) const; /*! Remove the instruction from the instruction stream */ void remove(void); /*! Indicates if the instruction belongs to instruction type T. Typically, T * can be BinaryInstruction, UnaryInstruction, LoadInstruction and so on */ template INLINE bool isMemberOf(void) const { return T::isClassOf(*this); } static const uint32_t MAX_SRC_NUM = 8; static const uint32_t MAX_DST_NUM = 8; protected: BasicBlock *parent; //!< The basic block containing the instruction GBE_CLASS(Instruction); //!< Use internal allocators }; /*! Output the instruction string in the given stream */ std::ostream &operator<< (std::ostream &out, const Instruction &proxy); /*! Unary instructions are typed. dst and sources share the same type */ class UnaryInstruction : public Instruction { public: /*! Get the type manipulated by the instruction */ Type getType(void) const; /*! Return true if the given instruction is an instance of this class */ static bool isClassOf(const Instruction &insn); }; /*! Binary instructions are typed. dst and sources share the same type */ class BinaryInstruction : public Instruction { public: /*! Get the type manipulated by the instruction */ Type getType(void) const; /*! Commutative instructions can allow better optimizations */ bool commutes(void) const; /*! Return true if the given instruction is an instance of this class */ static bool isClassOf(const Instruction &insn); }; /*! Ternary instructions are typed. dst and sources share the same type */ class TernaryInstruction : public Instruction { public: Type getType(void) const; static bool isClassOf(const Instruction &insn); }; /*! Select instructions writes src0 to dst if cond is true. Otherwise, it * writes src1 */ class SelectInstruction : public Instruction { public: /*! Predicate is in slot 0. So first source to selec is in slot 1 */ static const uint32_t src0Index = 1; /*! Second source to select is in slot 2 */ static const uint32_t src1Index = 2; /*! Get the predicate of the selection instruction */ INLINE Register getPredicate(void) const { return this->getSrc(0); } /*! Get the type of both sources */ Type getType(void) const; /*! Return true if the given instruction is an instance of this class */ static bool isClassOf(const Instruction &insn); }; /*! Compare instructions compare anything from the same type and return a * boolean value */ class CompareInstruction : public Instruction { public: /*! Get the type of the source registers */ Type getType(void) const; /*! Return true if the given instruction is an instance of this class */ static bool isClassOf(const Instruction &insn); }; /*! Conversion instruction converts from one type to another */ class ConvertInstruction : public Instruction { public: /*! Get the type of the source */ Type getSrcType(void) const; /*! Get the type of the destination */ Type getDstType(void) const; /*! Return true if the given instruction is an instance of this class */ static bool isClassOf(const Instruction &insn); }; /*! Atomic instruction */ class AtomicInstruction : public Instruction { public: /*! Where the address register goes */ static const uint32_t addressIndex = 0; /*! Address space that is manipulated here */ AddressSpace getAddressSpace(void) const; /*! Return the atomic function code */ AtomicOps getAtomicOpcode(void) const; /*! Return the register that contains the addresses */ INLINE Register getAddress(void) const { return this->getSrc(addressIndex); } /*! Return true if the given instruction is an instance of this class */ static bool isClassOf(const Instruction &insn); }; /*! Store instruction. First source is the address. Next sources are the * values to store contiguously at the given address */ class StoreInstruction : public Instruction { public: /*! Where the address register goes */ static const uint32_t addressIndex = 0; /*! Return the types of the values to store */ Type getValueType(void) const; /*! Give the number of values the instruction is storing (srcNum-1) */ uint32_t getValueNum(void) const; /*! Address space that is manipulated here */ AddressSpace getAddressSpace(void) const; /*! DWORD aligned means untyped read for Gen. That is what matters */ bool isAligned(void) const; /*! Return the register that contains the addresses */ INLINE Register getAddress(void) const { return this->getSrc(addressIndex); } /*! Return the register that contain value valueID */ INLINE Register getValue(uint32_t valueID) const { GBE_ASSERT(valueID < this->getValueNum()); return this->getSrc(valueID + 1u); } /*! Return true if the given instruction is an instance of this class */ static bool isClassOf(const Instruction &insn); }; /*! Load instruction. The source is simply the address where to get the data. * The multiple destinations are the contiguous values loaded at the given * address */ class LoadInstruction : public Instruction { public: /*! Type of the loaded values (ie type of all the destinations) */ Type getValueType(void) const; /*! Number of values loaded (ie number of destinations) */ uint32_t getValueNum(void) const; /*! Address space that is manipulated here */ AddressSpace getAddressSpace(void) const; /*! DWORD aligned means untyped read for Gen. That is what matters */ bool isAligned(void) const; /*! Return the register that contains the addresses */ INLINE Register getAddress(void) const { return this->getSrc(0u); } /*! Return the register that contain value valueID */ INLINE Register getValue(uint32_t valueID) const { return this->getDst(valueID); } /*! Return true if the given instruction is an instance of this class */ static bool isClassOf(const Instruction &insn); }; /*! Load immediate instruction loads an typed immediate value into the given * register. Since double and uint64_t values will not fit into an * instruction, the immediate themselves are stored in the function core. * Contrary to regular load instructions, there is only one destination * possible */ class LoadImmInstruction : public Instruction { public: /*! Return the value stored in the instruction */ Immediate getImmediate(void) const; /*! Return the type of the stored value */ Type getType(void) const; /*! Return true if the given instruction is an instance of this class */ static bool isClassOf(const Instruction &insn); }; /*! Store data in an texture */ class TypedWriteInstruction : public Instruction { public: enum { SURFACE_BTI = 0 }; /*! Return true if the given instruction is an instance of this class */ static bool isClassOf(const Instruction &insn); Type getSrcType(void) const; Type getCoordType(void) const; }; /*! Load texels from a texture */ class SampleInstruction : public Instruction { public: enum { SURFACE_BTI = 0, SAMPLER_BTI = 1 }; /*! Return true if the given instruction is an instance of this class */ static bool isClassOf(const Instruction &insn); Type getSrcType(void) const; Type getDstType(void) const; }; typedef union { struct { uint8_t index; /*! the allocated image index */ uint8_t type; /*! the information type */ }; uint32_t data; } ImageInfoKey; /*! Get image information */ class GetImageInfoInstruction : public Instruction { public: enum { SURFACE_BTI = 0 }; enum { WIDTH = 0, HEIGHT = 1, DEPTH = 2, CHANNEL_DATA_TYPE = 3, CHANNEL_ORDER = 4, }; static INLINE uint32_t getDstNum4Type(int infoType) { switch (infoType) { case WIDTH: case HEIGHT: case DEPTH: case CHANNEL_DATA_TYPE: case CHANNEL_ORDER: return 1; break; default: GBE_ASSERT(0); } return 0; } uint32_t getInfoType() const; /*! Return true if the given instruction is an instance of this class */ static bool isClassOf(const Instruction &insn); }; /*! Get image information */ class GetSamplerInfoInstruction : public Instruction { public: /*! Return true if the given instruction is an instance of this class */ static bool isClassOf(const Instruction &insn); }; /*! Branch instruction is the unified way to branch (with or without * predicate) */ class BranchInstruction : public Instruction { public: /*! Indicate if the branch is predicated */ bool isPredicated(void) const; /*! Return the predicate register (if predicated) */ RegisterData getPredicate(void) const { GBE_ASSERTM(this->isPredicated() == true, "Branch is not predicated"); return this->getSrcData(0); } /*! Return the predicate register index (if predicated) */ Register getPredicateIndex(void) const { GBE_ASSERTM(this->isPredicated() == true, "Branch is not predicated"); return this->getSrc(0); } /*! Return the label index pointed by the branch */ LabelIndex getLabelIndex(void) const; /*! Return true if the given instruction is an instance of this class */ static bool isClassOf(const Instruction &insn); }; /*! Label instruction are actual no-op but are referenced by branches as their * targets */ class LabelInstruction : public Instruction { public: /*! Return the label index of the instruction */ LabelIndex getLabelIndex(void) const; /*! Return true if the given instruction is an instance of this class */ static bool isClassOf(const Instruction &insn); }; /*! Texture instruction are used for any texture mapping requests */ class TextureInstruction : public Instruction { public: /*! Return true if the given instruction is an instance of this class */ static bool isClassOf(const Instruction &insn); }; /*! Mapped to OpenCL (mem_fence, read_mem_fence, write_mem_fence, barrier) */ enum { SYNC_WORKGROUP_EXEC = 1<<0, SYNC_LOCAL_READ_FENCE = 1<<1, SYNC_LOCAL_WRITE_FENCE = 1<<2, SYNC_GLOBAL_READ_FENCE = 1<<3, SYNC_GLOBAL_WRITE_FENCE = 1<<4, SYNC_INVALID = 1<<5 }; /*! 5 bits to encode all possible synchronization capablities */ static const uint32_t syncFieldNum = 5u; /*! When barrier(CLK_LOCAL_MEM_FENCE) is issued */ static const uint32_t syncLocalBarrier = SYNC_WORKGROUP_EXEC |SYNC_LOCAL_WRITE_FENCE | SYNC_LOCAL_READ_FENCE; /*! When barrier(CLK_GLOBAL_MEM_FENCE) is issued */ static const uint32_t syncGlobalBarrier = SYNC_WORKGROUP_EXEC | SYNC_GLOBAL_WRITE_FENCE | SYNC_GLOBAL_READ_FENCE; /*! Sync instructions are used to order loads and stores for a given memory * space and/or to serialize threads at a given point in the program */ class SyncInstruction : public Instruction { public: /*! Get the parameters (bitfields) of the sync instructions (see above) */ uint32_t getParameters(void) const; /*! Return true if the given instruction is an instance of this class */ static bool isClassOf(const Instruction &insn); }; /*! Specialize the instruction. Also performs typechecking first based on the * opcode. Crashes if it fails */ template INLINE T *cast(Instruction *insn) { if(insn->isMemberOf()) return reinterpret_cast(insn); else return NULL; } template INLINE const T *cast(const Instruction *insn) { if(insn->isMemberOf()) return reinterpret_cast(insn); else return NULL; } template INLINE T &cast(Instruction &insn) { GBE_ASSERTM(insn.isMemberOf() == true, "Invalid instruction type"); return reinterpret_cast(insn); } template INLINE const T &cast(const Instruction &insn) { GBE_ASSERTM(insn.isMemberOf() == true, "Invalid instruction type"); return reinterpret_cast(insn); } /*! Indicates if the given opcode belongs the given instruction family */ template struct EqualType {enum {value = false};}; template struct EqualType { enum {value = true};}; template INLINE bool isOpcodeFrom(Opcode op) { switch (op) { #define DECL_INSN(OPCODE, FAMILY) \ case OP_##OPCODE: return EqualType::value; #include "instruction.hxx" #undef DECL_INSN default: NOT_SUPPORTED; return false; } } /////////////////////////////////////////////////////////////////////////// /// All emission functions /////////////////////////////////////////////////////////////////////////// /*! alu1.type dst src */ Instruction ALU1(Opcode opcode, Type type, Register dst, Register src); /*! mov.type dst src */ Instruction MOV(Type type, Register dst, Register src); /*! cos.type dst src */ Instruction COS(Type type, Register dst, Register src); /*! sin.type dst src */ Instruction SIN(Type type, Register dst, Register src); /*! mul_hi.type dst src */ Instruction MUL_HI(Type type, Register dst, Register src0, Register src1); /*! i64_mul_hi.type dst src */ Instruction I64_MUL_HI(Type type, Register dst, Register src0, Register src1); /*! i64madsat.type dst src */ Instruction I64MADSAT(Type type, Register dst, Tuple src); /*! upsample_short.type dst src */ Instruction UPSAMPLE_SHORT(Type type, Register dst, Register src0, Register src1); /*! upsample_int.type dst src */ Instruction UPSAMPLE_INT(Type type, Register dst, Register src0, Register src1); /*! upsample_long.type dst src */ Instruction UPSAMPLE_LONG(Type type, Register dst, Register src0, Register src1); /*! fbh.type dst src */ Instruction FBH(Type type, Register dst, Register src); /*! fbl.type dst src */ Instruction FBL(Type type, Register dst, Register src); /*! hadd.type dst src */ Instruction HADD(Type type, Register dst, Register src0, Register src1); /*! rhadd.type dst src */ Instruction RHADD(Type type, Register dst, Register src0, Register src1); /*! i64hadd.type dst src */ Instruction I64HADD(Type type, Register dst, Register src0, Register src1); /*! i64rhadd.type dst src */ Instruction I64RHADD(Type type, Register dst, Register src0, Register src1); /*! tan.type dst src */ Instruction RCP(Type type, Register dst, Register src); /*! abs.type dst src */ Instruction ABS(Type type, Register dst, Register src); /*! log.type dst src */ Instruction LOG(Type type, Register dst, Register src); /*! sqr.type dst src */ Instruction SQR(Type type, Register dst, Register src); /*! rsq.type dst src */ Instruction RSQ(Type type, Register dst, Register src); /*! rndd.type dst src */ Instruction RNDD(Type type, Register dst, Register src); /*! rnde.type dst src */ Instruction RNDE(Type type, Register dst, Register src); /*! rndu.type dst src */ Instruction RNDU(Type type, Register dst, Register src); /*! rndz.type dst src */ Instruction RNDZ(Type type, Register dst, Register src); /*! pow.type dst src0 src1 */ Instruction POW(Type type, Register dst, Register src0, Register src1); /*! mul.type dst src0 src1 */ Instruction MUL(Type type, Register dst, Register src0, Register src1); /*! add.type dst src0 src1 */ Instruction ADD(Type type, Register dst, Register src0, Register src1); /*! addsat.type dst src0 src1 */ Instruction ADDSAT(Type type, Register dst, Register src0, Register src1); /*! sub.type dst src0 src1 */ Instruction SUB(Type type, Register dst, Register src0, Register src1); /*! subsat.type dst src0 src1 */ Instruction SUBSAT(Type type, Register dst, Register src0, Register src1); /*! div.type dst src0 src1 */ Instruction DIV(Type type, Register dst, Register src0, Register src1); /*! rem.type dst src0 src1 */ Instruction REM(Type type, Register dst, Register src0, Register src1); /*! shl.type dst src0 src1 */ Instruction SHL(Type type, Register dst, Register src0, Register src1); /*! shr.type dst src0 src1 */ Instruction SHR(Type type, Register dst, Register src0, Register src1); /*! asr.type dst src0 src1 */ Instruction ASR(Type type, Register dst, Register src0, Register src1); /*! bsf.type dst src0 src1 */ Instruction BSF(Type type, Register dst, Register src0, Register src1); /*! bsb.type dst src0 src1 */ Instruction BSB(Type type, Register dst, Register src0, Register src1); /*! or.type dst src0 src1 */ Instruction OR(Type type, Register dst, Register src0, Register src1); /*! xor.type dst src0 src1 */ Instruction XOR(Type type, Register dst, Register src0, Register src1); /*! and.type dst src0 src1 */ Instruction AND(Type type, Register dst, Register src0, Register src1); /*! sel.type dst {cond, src0, src1} (== src) */ Instruction SEL(Type type, Register dst, Tuple src); /*! eq.type dst src0 src1 */ Instruction EQ(Type type, Register dst, Register src0, Register src1); /*! ne.type dst src0 src1 */ Instruction NE(Type type, Register dst, Register src0, Register src1); /*! lt.type dst src0 src1 */ Instruction LE(Type type, Register dst, Register src0, Register src1); /*! le.type dst src0 src1 */ Instruction LT(Type type, Register dst, Register src0, Register src1); /*! gt.type dst src0 src1 */ Instruction GE(Type type, Register dst, Register src0, Register src1); /*! ge.type dst src0 src1 */ Instruction GT(Type type, Register dst, Register src0, Register src1); /*! cvt.{dstType <- srcType} dst src */ Instruction CVT(Type dstType, Type srcType, Register dst, Register src); /*! sat_cvt.{dstType <- srcType} dst src */ Instruction SAT_CVT(Type dstType, Type srcType, Register dst, Register src); /*! atomic dst addr.space {src1 {src2}} */ Instruction ATOMIC(AtomicOps opcode, Register dst, AddressSpace space, Tuple src); /*! bra labelIndex */ Instruction BRA(LabelIndex labelIndex); /*! (pred) bra labelIndex */ Instruction BRA(LabelIndex labelIndex, Register pred); /*! ret */ Instruction RET(void); /*! load.type.space {dst1,...,dst_valueNum} offset value */ Instruction LOAD(Type type, Tuple dst, Register offset, AddressSpace space, uint32_t valueNum, bool dwAligned); /*! store.type.space offset {src1,...,src_valueNum} value */ Instruction STORE(Type type, Tuple src, Register offset, AddressSpace space, uint32_t valueNum, bool dwAligned); /*! loadi.type dst value */ Instruction LOADI(Type type, Register dst, ImmediateIndex value); /*! sync.params... (see Sync instruction) */ Instruction SYNC(uint32_t parameters); /*! typed write */ Instruction TYPED_WRITE(Tuple src, Type srcType, Type coordType); /*! sample textures */ Instruction SAMPLE(Tuple dst, Tuple src, Type dstType, Type srcType); /*! get image information , such as width/height/depth/... */ Instruction GET_IMAGE_INFO(int infoType, Register dst, Register src, Register infoReg); /*! get sampler information */ Instruction GET_SAMPLER_INFO(Register dst, Register src); /*! label labelIndex */ Instruction LABEL(LabelIndex labelIndex); } /* namespace ir */ } /* namespace gbe */ #endif /* __GBE_IR_INSTRUCTION_HPP__ */ Release_v0.3/backend/src/ir/instruction.hxx000066400000000000000000000065551223142177000211200ustar00rootroot00000000000000/* * Copyright 2012 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ /** * \file instruction.hxx * \author Benjamin Segovia */ DECL_INSN(MOV, UnaryInstruction) DECL_INSN(COS, UnaryInstruction) DECL_INSN(SIN, UnaryInstruction) DECL_INSN(LOG, UnaryInstruction) DECL_INSN(SQR, UnaryInstruction) DECL_INSN(RSQ, UnaryInstruction) DECL_INSN(RCP, UnaryInstruction) DECL_INSN(ABS, UnaryInstruction) DECL_INSN(RNDD, UnaryInstruction) DECL_INSN(RNDE, UnaryInstruction) DECL_INSN(RNDU, UnaryInstruction) DECL_INSN(RNDZ, UnaryInstruction) DECL_INSN(POW, BinaryInstruction) DECL_INSN(MUL, BinaryInstruction) DECL_INSN(ADD, BinaryInstruction) DECL_INSN(ADDSAT, BinaryInstruction) DECL_INSN(SUB, BinaryInstruction) DECL_INSN(SUBSAT, BinaryInstruction) DECL_INSN(DIV, BinaryInstruction) DECL_INSN(REM, BinaryInstruction) DECL_INSN(SHL, BinaryInstruction) DECL_INSN(SHR, BinaryInstruction) DECL_INSN(ASR, BinaryInstruction) DECL_INSN(BSF, BinaryInstruction) DECL_INSN(BSB, BinaryInstruction) DECL_INSN(OR, BinaryInstruction) DECL_INSN(XOR, BinaryInstruction) DECL_INSN(AND, BinaryInstruction) DECL_INSN(SEL, SelectInstruction) DECL_INSN(EQ, CompareInstruction) DECL_INSN(NE, CompareInstruction) DECL_INSN(LE, CompareInstruction) DECL_INSN(LT, CompareInstruction) DECL_INSN(GE, CompareInstruction) DECL_INSN(GT, CompareInstruction) DECL_INSN(CVT, ConvertInstruction) DECL_INSN(SAT_CVT, ConvertInstruction) DECL_INSN(ATOMIC, AtomicInstruction) DECL_INSN(BRA, BranchInstruction) DECL_INSN(RET, BranchInstruction) DECL_INSN(LOADI, LoadImmInstruction) DECL_INSN(LOAD, LoadInstruction) DECL_INSN(STORE, StoreInstruction) DECL_INSN(TYPED_WRITE, TypedWriteInstruction) DECL_INSN(SAMPLE, SampleInstruction) DECL_INSN(SYNC, SyncInstruction) DECL_INSN(LABEL, LabelInstruction) DECL_INSN(GET_IMAGE_INFO, GetImageInfoInstruction) DECL_INSN(GET_SAMPLER_INFO, GetSamplerInfoInstruction) DECL_INSN(MUL_HI, BinaryInstruction) DECL_INSN(I64_MUL_HI, BinaryInstruction) DECL_INSN(FBH, UnaryInstruction) DECL_INSN(FBL, UnaryInstruction) DECL_INSN(HADD, BinaryInstruction) DECL_INSN(RHADD, BinaryInstruction) DECL_INSN(I64HADD, BinaryInstruction) DECL_INSN(I64RHADD, BinaryInstruction) DECL_INSN(UPSAMPLE_SHORT, BinaryInstruction) DECL_INSN(UPSAMPLE_INT, BinaryInstruction) DECL_INSN(UPSAMPLE_LONG, BinaryInstruction) DECL_INSN(I64MADSAT, TernaryInstruction) Release_v0.3/backend/src/ir/liveness.cpp000066400000000000000000000071341223142177000203340ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file liveness.cpp * \author Benjamin Segovia */ #include "ir/liveness.hpp" #include namespace gbe { namespace ir { Liveness::Liveness(Function &fn) : fn(fn) { // Initialize UEVar and VarKill for each block fn.foreachBlock([this](const BasicBlock &bb) { this->initBlock(bb); }); // Now with iterative analysis, we compute liveout sets this->computeLiveOut(); } Liveness::~Liveness(void) { for (auto &pair : liveness) GBE_SAFE_DELETE(pair.second); } void Liveness::initBlock(const BasicBlock &bb) { GBE_ASSERT(liveness.contains(&bb) == false); BlockInfo *info = GBE_NEW(BlockInfo, bb); // Traverse all instructions to handle UEVar and VarKill const_cast(bb).foreach([this, info](const Instruction &insn) { this->initInstruction(*info, insn); }); liveness[&bb] = info; } void Liveness::initInstruction(BlockInfo &info, const Instruction &insn) { const uint32_t srcNum = insn.getSrcNum(); const uint32_t dstNum = insn.getDstNum(); // First look for used before killed for (uint32_t srcID = 0; srcID < srcNum; ++srcID) { const Register reg = insn.getSrc(srcID); // Not killed -> it is really an upward use if (info.varKill.contains(reg) == false) info.upwardUsed.insert(reg); } // A destination is a killed value for (uint32_t dstID = 0; dstID < dstNum; ++dstID) { const Register reg = insn.getDst(dstID); info.varKill.insert(reg); } } void Liveness::computeLiveOut(void) { // First insert the UEVar from the successors foreach([](BlockInfo &info, const BlockInfo &succ) { const UEVar &ueVarSet = succ.upwardUsed; // Iterate over all the registers in the UEVar of our successor for (auto ueVar : ueVarSet) info.liveOut.insert(ueVar); }); // Now iterate on liveOut bool changed = true; while (changed) { changed = false; foreach([&changed](BlockInfo &info, const BlockInfo &succ) { const UEVar &killSet = succ.varKill; const LiveOut &liveOut = succ.liveOut; // Iterate over all the registers in the UEVar of our successor for (auto living : liveOut) { if (killSet.contains(living)) continue; if (info.liveOut.contains(living)) continue; info.liveOut.insert(living); changed = true; } }); } } /*! To pretty print the livfeness info */ static const uint32_t prettyInsnStrSize = 48; static const uint32_t prettyRegStrSize = 5; /*! Describe how the register is used */ static const uint32_t USE_NONE = 0; static const uint32_t USE_READ = 1 << 0; static const uint32_t USE_WRITTEN = 1 << 1; enum UsePosition { POS_BEFORE = 0, POS_HERE = 1, POS_AFTER = 2 }; } /* namespace ir */ } /* namespace gbe */ Release_v0.3/backend/src/ir/liveness.hpp000066400000000000000000000107001223142177000203320ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file liveness.hpp * \author Benjamin Segovia */ #ifndef __GBE_IR_LIVENESS_HPP__ #define __GBE_IR_LIVENESS_HPP__ #include "sys/map.hpp" #include "sys/set.hpp" #include "ir/register.hpp" #include "ir/function.hpp" namespace gbe { namespace ir { // Liveness is computed per function class Function; /*! To choose the iteration direction, we either look at predecessors or * successors */ enum DataFlowDirection { DF_PRED = 0, DF_SUCC = 1 }; /*! Compute liveness of each register */ class Liveness : public NonCopyable { public: Liveness(Function &fn); ~Liveness(void); /*! Set of variables used upwards in the block (before a definition) */ typedef set UEVar; /*! Set of variables alive at the exit of the block */ typedef set LiveOut; /*! Set of variables actually killed in each block */ typedef set VarKill; /*! Per-block info */ struct BlockInfo : public NonCopyable { BlockInfo(const BasicBlock &bb) : bb(bb) {} const BasicBlock &bb; INLINE bool inUpwardUsed(Register reg) const { return upwardUsed.contains(reg); } INLINE bool inLiveOut(Register reg) const { return liveOut.contains(reg); } INLINE bool inVarKill(Register reg) const { return varKill.contains(reg); } UEVar upwardUsed; LiveOut liveOut; VarKill varKill; }; /*! Gives for each block the variables alive at entry / exit */ typedef map Info; /*! Return the complete liveness info */ INLINE const Info &getLivenessInfo(void) const { return liveness; } /*! Return the complete block info */ INLINE const BlockInfo &getBlockInfo(const BasicBlock *bb) const { auto it = liveness.find(bb); GBE_ASSERT(it != liveness.end() && it->second != NULL); return *it->second; } /*! Get the set of registers alive at the end of the block */ const LiveOut &getLiveOut(const BasicBlock *bb) const { const BlockInfo &info = this->getBlockInfo(bb); return info.liveOut; } /*! Return the function the liveness was computed on */ INLINE const Function &getFunction(void) const { return fn; } /*! Actually do something for each successor / predecessor of *all* blocks */ template void foreach(const T &functor) { // Iterate on all blocks for (const auto &pair : liveness) { BlockInfo &info = *pair.second; const BasicBlock &bb = info.bb; const BlockSet *set = NULL; if (dir == DF_SUCC) set = &bb.getSuccessorSet(); else set = &bb.getPredecessorSet(); // Iterate over all successors for (auto other : *set) { auto otherInfo = liveness.find(other); GBE_ASSERT(otherInfo != liveness.end() && otherInfo->second != NULL); functor(info, *otherInfo->second); } } } private: /*! Store the liveness of all blocks */ Info liveness; /*! Compute the liveness for this function */ Function &fn; /*! Initialize UEVar and VarKill per block */ void initBlock(const BasicBlock &bb); /*! Initialize UEVar and VarKill per instruction */ void initInstruction(BlockInfo &info, const Instruction &insn); /*! Now really compute LiveOut based on UEVar and VarKill */ void computeLiveOut(void); /*! Use custom allocators */ GBE_CLASS(Liveness); }; /*! Output a nice ASCII reprensation of the liveness */ std::ostream &operator<< (std::ostream &out, const Liveness &liveness); } /* namespace ir */ } /* namespace gbe */ #endif /* __GBE_IR_LIVENESS_HPP__ */ Release_v0.3/backend/src/ir/lowering.cpp000066400000000000000000000315701223142177000203330ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file lowering.cpp * \author Benjamin Segovia */ #include "ir/context.hpp" #include "ir/value.hpp" #include "ir/liveness.hpp" #include "sys/set.hpp" namespace gbe { namespace ir { /*! Small helper class to lower return instructions */ class ContextReturn : public Context { public: /*! Initialize a context dedicated to return instruction lowering */ ContextReturn(Unit &unit) : Context(unit) { this->usedLabels = GBE_NEW_NO_ARG(vector); } /*! Lower the return instruction to gotos for the given function */ void lower(const std::string &functionName); }; void ContextReturn::lower(const std::string &functionName) { if ((this->fn = unit.getFunction(functionName)) == NULL) return; // Append a new block at the end of the function with a return instruction: // the only one we are going to have this->bb = &this->fn->getBottomBlock(); const LabelIndex index = this->label(); this->LABEL(index); const BasicBlock *lastBlock = this->bb; this->RET(); // Now traverse all instructions and replace all returns by GOTO index fn->foreachInstruction([&](Instruction &insn) { if (insn.getParent() == lastBlock) return; // This is the last block if (insn.getOpcode() != OP_RET) return; const Instruction bra = ir::BRA(index); bra.replace(&insn); }); } void lowerReturn(Unit &unit, const std::string &functionName) { ContextReturn ctx(unit); ctx.lower(functionName); } /*! Characterizes how the argument is used (directly read, indirectly read, * written) */ enum ArgUse { ARG_DIRECT_READ = 0, ARG_INDIRECT_READ = 1, ARG_WRITTEN = 2 }; /*! Just to book keep the sequence of instructions that directly load an input * argument */ struct LoadAddImm { Instruction *load; //!< Load from the argument Instruction *add; //!< Can be NULL if we only have load(arg) Instruction *loadImm; //!< Can also be NULL uint64_t offset; //!< Offset where to load in the structure uint32_t argID; //!< Associated function argument }; /*! List of direct loads */ typedef vector LoadAddImmSeq; /*! Helper class to lower function arguments if required */ class FunctionArgumentLowerer : public Context { public: /*! Build the helper structure */ FunctionArgumentLowerer(Unit &unit); /*! Free everything we needed */ virtual ~FunctionArgumentLowerer(void); /*! Perform all function arguments substitution if needed */ void lower(const std::string &name); /*! Lower the given function argument accesses */ void lower(uint32_t argID); /*! Build the constant push for the function */ void buildConstantPush(void); /*! Inspect the given function argument to see how it is used. If this is * direct loads only, we also output the list of instructions used for each * load */ ArgUse getArgUse(uint32_t argID); /*! Recursively look if there is a store in the given use */ bool useStore(const ValueDef &def, set &visited); /*! Look if the pointer use only load with immediate offsets */ bool matchLoadAddImm(uint32_t argID); Liveness *liveness; //!< To compute the function graph FunctionDAG *dag; //!< Contains complete dependency information LoadAddImmSeq seq; //!< All the direct loads }; INLINE uint64_t getOffsetFromImm(const Immediate &imm) { switch (imm.type) { // bit-cast these ones case TYPE_DOUBLE: case TYPE_FLOAT: case TYPE_S64: case TYPE_U64: case TYPE_U32: case TYPE_U16: case TYPE_U8: return imm.data.u64; // sign extend these ones case TYPE_S32: return int64_t(imm.data.s32); case TYPE_S16: return int64_t(imm.data.s16); case TYPE_S8: return int64_t(imm.data.s8); case TYPE_BOOL: case TYPE_HALF: NOT_SUPPORTED; return 0; } return 0; } bool matchLoad(Instruction *insn, Instruction *add, Instruction *loadImm, uint64_t offset, uint32_t argID, LoadAddImm &loadAddImm) { const Opcode opcode = insn->getOpcode(); if (opcode == OP_LOAD) { LoadInstruction *load = cast(insn); if (load->getAddressSpace() != MEM_PRIVATE) return false; loadAddImm.load = insn; loadAddImm.add = add; loadAddImm.loadImm = loadImm; loadAddImm.offset = offset; loadAddImm.argID = argID; return true; } else return false; } FunctionArgumentLowerer::FunctionArgumentLowerer(Unit &unit) : Context(unit), liveness(NULL), dag(NULL) {} FunctionArgumentLowerer::~FunctionArgumentLowerer(void) { GBE_SAFE_DELETE(dag); GBE_SAFE_DELETE(liveness); } void FunctionArgumentLowerer::lower(const std::string &functionName) { if ((this->fn = unit.getFunction(functionName)) == NULL) return; GBE_SAFE_DELETE(dag); GBE_SAFE_DELETE(liveness); this->liveness = GBE_NEW(ir::Liveness, *fn); this->dag = GBE_NEW(ir::FunctionDAG, *this->liveness); // Process all structure arguments and find all the direct loads we can // replace const uint32_t argNum = fn->argNum(); for (uint32_t argID = 0; argID < argNum; ++argID) { FunctionArgument &arg = fn->getArg(argID); if (arg.type != FunctionArgument::STRUCTURE) continue; this->lower(argID); } // Build the constant push description and remove the instruction that // therefore become useless this->buildConstantPush(); } // Remove all the given instructions from the stream (if dead) #define REMOVE_INSN(WHICH) \ for (const auto &loadAddImm : seq) { \ Instruction *WHICH = loadAddImm.WHICH; \ if (WHICH == NULL) continue; \ const UseSet &useSet = dag->getUse(WHICH, 0); \ bool isDead = true; \ for (auto use : useSet) { \ if (dead.contains(use->getInstruction()) == false) { \ isDead = false; \ break; \ } \ } \ if (isDead) { \ dead.insert(WHICH); \ WHICH->remove(); \ } \ } void FunctionArgumentLowerer::buildConstantPush(void) { if (seq.size() == 0) return; // Track instructions we remove to recursively kill them properly set dead; // The argument location we already pushed (since the same argument location // can be used several times) set inserted; for (const auto &loadAddImm : seq) { LoadInstruction *load = cast(loadAddImm.load); const uint32_t valueNum = load->getValueNum(); for (uint32_t valueID = 0; valueID < valueNum; ++valueID) { const Type type = load->getValueType(); const RegisterFamily family = getFamily(type); const uint32_t size = getFamilySize(family); const uint32_t offset = loadAddImm.offset + valueID * size; const PushLocation argLocation(*fn, loadAddImm.argID, offset); if (inserted.contains(argLocation)) continue; const Register reg = load->getValue(valueID); const Register pushed = fn->newRegister(family); // TODO the MOV instruction can be most of the time avoided if the // register is never written. We must however support the register // replacement in the instruction interface to be able to patch all the // instruction that uses "reg" const Instruction mov = ir::MOV(type, reg, pushed); mov.replace(load); dead.insert(load); this->appendPushedConstant(pushed, argLocation); } } // Remove all unused adds and load immediates REMOVE_INSN(add) REMOVE_INSN(loadImm) } #undef REMOVE_INSN bool FunctionArgumentLowerer::useStore(const ValueDef &def, set &visited) { const UseSet &useSet = dag->getUse(def); for (const auto &use : useSet) { const Instruction *insn = use->getInstruction(); const uint32_t srcID = use->getSrcID(); const Opcode opcode = insn->getOpcode(); if (visited.contains(insn)) continue; visited.insert(insn); if (opcode == OP_STORE && srcID == StoreInstruction::addressIndex) return true; if (insn->isMemberOf() == false && insn->isMemberOf() == false) continue; else { const uint32_t dstNum = insn->getDstNum(); for (uint32_t dstID = 0; dstID < dstNum; ++dstID) if (this->useStore(ValueDef(insn, dstID), visited) == true) return true; } } return false; } bool FunctionArgumentLowerer::matchLoadAddImm(uint32_t argID) { const FunctionArgument &arg = fn->getArg(argID); LoadAddImmSeq tmpSeq; // Inspect all uses of the function argument pointer const UseSet &useSet = dag->getUse(&arg); for (auto use : useSet) { Instruction *insn = const_cast(use->getInstruction()); const Opcode opcode = insn->getOpcode(); // load dst arg LoadAddImm loadAddImm; if (matchLoad(insn, NULL, NULL, 0, argID, loadAddImm)) { tmpSeq.push_back(loadAddImm); continue; } // add.ptr_type dst ptr other if (opcode != OP_ADD) return false; BinaryInstruction *add = cast(insn); const Type addType = add->getType(); const RegisterFamily family = getFamily(addType); if (family != unit.getPointerFamily()) return false; if (addType == TYPE_FLOAT) return false; // step 1 -> check that the other source comes from a load immediate const uint32_t srcID = use->getSrcID(); const uint32_t otherID = srcID ^ 1; const DefSet &defSet = dag->getDef(insn, otherID); const uint32_t defNum = defSet.size(); if (defNum == 0 || defNum > 1) continue; // undefined or more than one def const ValueDef *otherDef = *defSet.begin(); if (otherDef->getType() != ValueDef::DEF_INSN_DST) return false; Instruction *otherInsn = const_cast(otherDef->getInstruction()); if (otherInsn->getOpcode() != OP_LOADI) return false; LoadImmInstruction *loadImm = cast(otherInsn); const Immediate imm = loadImm->getImmediate(); const uint64_t offset = getOffsetFromImm(imm); // step 2 -> check that the results of the add are loads from private // memory const UseSet &addUseSet = dag->getUse(add, 0); for (auto addUse : addUseSet) { Instruction *insn = const_cast(addUse->getInstruction()); // We finally find something like load dst arg+imm LoadAddImm loadAddImm; if (matchLoad(insn, add, loadImm, offset, argID, loadAddImm)) { tmpSeq.push_back(loadAddImm); continue; } } } // OK, the argument only need direct loads. We can now append all the // direct load definitions we found for (const auto &loadImmSeq : tmpSeq) seq.push_back(loadImmSeq); return true; } ArgUse FunctionArgumentLowerer::getArgUse(uint32_t argID) { FunctionArgument &arg = fn->getArg(argID); // case 1 - we may store something to the structure argument set visited; if (this->useStore(ValueDef(&arg), visited)) return ARG_WRITTEN; // case 2 - we look for the patterns: LOAD(ptr) or LOAD(ptr+imm) if (this->matchLoadAddImm(argID)) return ARG_DIRECT_READ; // case 3 - LOAD(ptr+runtime_value) return ARG_INDIRECT_READ; } void FunctionArgumentLowerer::lower(uint32_t argID) { IF_DEBUG(const ArgUse argUse = )this->getArgUse(argID); #if GBE_DEBUG GBE_ASSERTM(argUse != ARG_WRITTEN, "TODO A store to a structure argument " "(i.e. not a char/short/int/float argument) has been found. " "This is not supported yet"); GBE_ASSERTM(argUse != ARG_INDIRECT_READ, "TODO Only direct loads of structure arguments are " "supported now"); #endif /* GBE_DEBUG */ } void lowerFunctionArguments(Unit &unit, const std::string &functionName) { FunctionArgumentLowerer lowerer(unit); lowerer.lower(functionName); } } /* namespace ir */ } /* namespace gbe */ Release_v0.3/backend/src/ir/lowering.hpp000066400000000000000000000061721223142177000203400ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file lowering.hpp * \author Benjamin Segovia * Lower instructions that are not supported properly. Typical example is * handling returns or unsupported vector scatters / gathers */ #ifndef __GBE_IR_LOWERING_HPP__ #define __GBE_IR_LOWERING_HPP__ namespace gbe { namespace ir { // Structure to update class Unit; /*! Remove all return instructions and replace them to forward branches that * point to the only return instruction in a dedicated basic block and the end * of the function. * Typically this code: * * dst[x] = 1; * if (x > 4) return; * dst[x] = 3; * * will be replaced by: * * dst[x] = 1; * if (x > 4) goto end; * dst[x] = 3; * end: * return; * * There will be only one return at the end of the function. This return will * be simply encoded as a End-of-thread instruction (EOT) */ void lowerReturn(Unit &unit, const std::string &functionName); /*! Function arguments are a bit tricky since we must implement the proper C * semantic: we can therefore address the function arguments as we want and * we can even modify them. This leads to interesting challenges. We identify * several cases: * * case 1: * int f (__global int *dst, int x[16], int y) { * dst[get_global_id(0)] = x[16] + y; * } * Here x and y will be pushed to registers using the Curbe. No problem, we * can directly used the pushed registers * * case 2: * int f (__global int *dst, int x[16], int y) { * dst[get_global_id(0)] = x[get_local_id(0)] + y; * } * Here x is indirectly accessed. We need to perform a gather from memory. We * can simply gather it from the curbe in memory * * case 3: * int f (__global int *dst, int x[16], int y) { * x[get_local_id(0)] = y + 1; * int *ptr = get_local_id(0) % 2 ? x[0] : x[1]; * dst[get_global_id(0)] = *ptr; * } * Here we modify the function argument since it is valid C. Problem is that * we are running in SIMD mode while the data are scalar (in both memory and * registers). In that case, we just spill everything to memory (using the * stack) and reload it from here when needed. */ void lowerFunctionArguments(Unit &unit, const std::string &functionName); } /* namespace ir */ } /* namespace gbe */ #endif /* __GBE_IR_LOWERING_HPP__ */ Release_v0.3/backend/src/ir/profile.cpp000066400000000000000000000061011223142177000201350ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file profile.hpp * \author Benjamin Segovia */ #include "ir/profile.hpp" #include "ir/function.hpp" #include "sys/platform.hpp" namespace gbe { namespace ir { namespace ocl { const char *specialRegMean[] = { "local_id_0", "local_id_1", "local_id_2", "group_id_0", "group_id_1", "group_id_2", "num_groups_0", "num_groups_1", "num_groups_2", "local_size_0", "local_size_1", "local_size_2", "global_size_0", "global_size_1", "global_size_2", "global_offset_0", "global_offset_1", "global_offset_2", "stack_pointer", "block_ip", "barrier_id", "thread_number", "work_dimension", "sampler_info" }; #if GBE_DEBUG #define DECL_NEW_REG(FAMILY, REG) \ r = fn.newRegister(FAMILY_DWORD); \ GBE_ASSERT(r == REG); #else #define DECL_NEW_REG(FAMILY, REG) \ fn.newRegister(FAMILY_DWORD); #endif /* GBE_DEBUG */ static void init(Function &fn) { IF_DEBUG(Register r); DECL_NEW_REG(FAMILY_DWORD, lid0); DECL_NEW_REG(FAMILY_DWORD, lid1); DECL_NEW_REG(FAMILY_DWORD, lid2); DECL_NEW_REG(FAMILY_DWORD, groupid0); DECL_NEW_REG(FAMILY_DWORD, groupid1); DECL_NEW_REG(FAMILY_DWORD, groupid2); DECL_NEW_REG(FAMILY_DWORD, numgroup0); DECL_NEW_REG(FAMILY_DWORD, numgroup1); DECL_NEW_REG(FAMILY_DWORD, numgroup2); DECL_NEW_REG(FAMILY_DWORD, lsize0); DECL_NEW_REG(FAMILY_DWORD, lsize1); DECL_NEW_REG(FAMILY_DWORD, lsize2); DECL_NEW_REG(FAMILY_DWORD, gsize0); DECL_NEW_REG(FAMILY_DWORD, gsize1); DECL_NEW_REG(FAMILY_DWORD, gsize2); DECL_NEW_REG(FAMILY_DWORD, goffset0); DECL_NEW_REG(FAMILY_DWORD, goffset1); DECL_NEW_REG(FAMILY_DWORD, goffset2); DECL_NEW_REG(FAMILY_DWORD, stackptr); DECL_NEW_REG(FAMILY_WORD, blockip); DECL_NEW_REG(FAMILY_DWORD, barrierid); DECL_NEW_REG(FAMILY_DWORD, threadn); DECL_NEW_REG(FAMILY_DWORD, workdim); DECL_NEW_REG(FAMILY_WORD, samplerinfo); } #undef DECL_NEW_REG } /* namespace ocl */ void initProfile(Function &fn) { const Profile profile = fn.getProfile(); switch (profile) { case PROFILE_C: GBE_ASSERTM(false, "Unsupported profile"); break; case PROFILE_OCL: ocl::init(fn); }; } } /* namespace ir */ } /* namespace gbe */ Release_v0.3/backend/src/ir/profile.hpp000066400000000000000000000064071223142177000201530ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file profile.hpp * \author Benjamin Segovia */ #ifndef __GBE_IR_PROFILE_HPP__ #define __GBE_IR_PROFILE_HPP__ #include "ir/register.hpp" namespace gbe { namespace ir { /*! Profile is defined *per-function* and mostly predefined registers */ enum Profile : uint32_t { PROFILE_C = 0, // Not used now PROFILE_OCL = 1 }; // Will be pre-initialized based on its profile class Function; /*! Registers used for ocl */ namespace ocl { static const Register lid0 = Register(0); // get_local_id(0) static const Register lid1 = Register(1); // get_local_id(1) static const Register lid2 = Register(2); // get_local_id(2) static const Register groupid0 = Register(3); // get_group_id(0) static const Register groupid1 = Register(4); // get_group_id(1) static const Register groupid2 = Register(5); // get_group_id(2) static const Register numgroup0 = Register(6); // get_num_groups(0) static const Register numgroup1 = Register(7); // get_num_groups(1) static const Register numgroup2 = Register(8); // get_num_groups(2) static const Register lsize0 = Register(9); // get_local_size(0) static const Register lsize1 = Register(10); // get_local_size(1) static const Register lsize2 = Register(11); // get_local_size(2) static const Register gsize0 = Register(12); // get_global_size(0) static const Register gsize1 = Register(13); // get_global_size(1) static const Register gsize2 = Register(14); // get_global_size(2) static const Register goffset0 = Register(15); // get_global_offset(0) static const Register goffset1 = Register(16); // get_global_offset(1) static const Register goffset2 = Register(17); // get_global_offset(2) static const Register stackptr = Register(18); // stack pointer static const Register blockip = Register(19); // blockip static const Register barrierid = Register(20);// barrierid static const Register threadn = Register(21); // number of threads static const Register workdim = Register(22); // work dimention. static const Register samplerinfo = Register(23); // store sampler info. static const uint32_t regNum = 24; // number of special registers extern const char *specialRegMean[]; // special register name. } /* namespace ocl */ /*! Initialize the profile of the given function */ void initProfile(Function &fn); } /* namespace ir */ } /* namespace gbe */ #endif /* __GBE_IR_PROFILE_HPP__ */ Release_v0.3/backend/src/ir/register.cpp000066400000000000000000000041461223142177000203300ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file register.cpp * \author Benjamin Segovia */ #include "ir/profile.hpp" #include "ir/register.hpp" namespace gbe { namespace ir { std::ostream &operator<< (std::ostream &out, const RegisterData ®Data) { switch (regData.family) { case FAMILY_BOOL: return out << "bool"; case FAMILY_BYTE: return out << "byte"; case FAMILY_WORD: return out << "word"; case FAMILY_DWORD: return out << "dword"; case FAMILY_QWORD: return out << "qword"; }; return out; } std::ostream &operator<< (std::ostream &out, const RegisterFile &file) { out << "## " << file.regNum() << " register" << (file.regNum() ? "s" : "") << " ##" << std::endl; for (uint32_t i = 0; i < file.regNum(); ++i) { const RegisterData reg = file.get(Register(i)); out << ".decl." << reg << " %" << i; if (i < ocl::regNum) out << " " << ocl::specialRegMean[i]; out << std::endl; } return out; } Tuple RegisterFile::appendArrayTuple(const Register *reg, uint32_t regNum) { const Tuple index = Tuple(regTuples.size()); for (uint32_t regID = 0; regID < regNum; ++regID) { GBE_ASSERTM(reg[regID] < this->regNum(), "Out-of-bound register"); regTuples.push_back(reg[regID]); } return index; } } /* namespace ir */ } /* namespace gbe */ Release_v0.3/backend/src/ir/register.hpp000066400000000000000000000124771223142177000203430ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file register.hpp * \author Benjamin Segovia */ #ifndef __GBE_IR_REGISTER_HPP__ #define __GBE_IR_REGISTER_HPP__ #include "sys/vector.hpp" #include "sys/platform.hpp" namespace gbe { namespace ir { /*! Defines the size of the pointers. All the functions from the unit will * use the same pointer size as the unit they belong to */ enum PointerSize { POINTER_32_BITS = 32, POINTER_64_BITS = 64 }; /*! Basically provides the size of the register */ enum RegisterFamily : uint8_t { FAMILY_BOOL = 0, FAMILY_BYTE = 1, FAMILY_WORD = 2, FAMILY_DWORD = 3, FAMILY_QWORD = 4 }; INLINE uint32_t getFamilySize(RegisterFamily family) { switch (family) { case FAMILY_BYTE: return 1; case FAMILY_WORD: return 2; case FAMILY_DWORD: return 4; case FAMILY_QWORD: return 8; default: NOT_SUPPORTED; }; return 0; } /*! A register can be either a byte, a word, a dword or a qword. We store this * value into a register data (which makes the register file) */ class RegisterData { public: /*! Build a register. All fields will be immutable */ INLINE RegisterData(RegisterFamily family = FAMILY_DWORD) : family(family) {} /*! Copy constructor */ INLINE RegisterData(const RegisterData &other) : family(other.family) {} /*! Copy operator */ INLINE RegisterData &operator= (const RegisterData &other) { this->family = other.family; return *this; } /*! Nothing really happens here */ INLINE ~RegisterData(void) {} RegisterFamily family; //!< Register size or if it is a flag GBE_CLASS(RegisterData); }; /*! Output the register file string in the given stream */ std::ostream &operator<< (std::ostream &out, const RegisterData ®Data); /*! Register is the position of the index of the register data in the register * file. We enforce type safety with this class */ TYPE_SAFE(Register, uint16_t) INLINE bool operator< (const Register &r0, const Register &r1) { return r0.value() < r1.value(); } /*! Tuple is the position of the first register in the tuple vector. We * enforce type safety with this class */ TYPE_SAFE(Tuple, uint16_t) /*! A register file allocates and destroys registers. Basically, we will have * one register file per function */ class RegisterFile { public: /*! Return the index of a newly allocated register */ INLINE Register append(RegisterFamily family) { GBE_ASSERTM(regNum() < MAX_INDEX, "Too many defined registers (only 65535 are supported)"); const uint16_t index = regNum(); const RegisterData reg(family); regs.push_back(reg); return Register(index); } /*! Make a tuple from an array of register */ Tuple appendArrayTuple(const Register *reg, uint32_t regNum); /*! Make a tuple and return the index to the first element of the tuple */ template INLINE Tuple appendTuple(First first, Rest... rest) { const Tuple index = Tuple(regTuples.size()); GBE_ASSERTM(first < regNum(), "Out-of-bound register"); regTuples.push_back(first); appendTuple(rest...); return index; } /*! To terminate variadic recursion */ INLINE void appendTuple(void) {} /*! Return a copy of the register at index */ INLINE RegisterData get(Register index) const { return regs[index]; } /*! Get the register index from the tuple */ INLINE Register get(Tuple index, uint32_t which) const { return regTuples[uint16_t(index) + which]; } /*! Set the register index from the tuple */ INLINE void set(Tuple index, uint32_t which, Register reg) { regTuples[uint16_t(index) + which] = reg; } /*! Number of registers in the register file */ INLINE uint32_t regNum(void) const { return regs.size(); } /*! Number of tuples in the register file */ INLINE uint32_t tupleNum(void) const { return regTuples.size(); } /*! register and tuple indices are short */ enum { MAX_INDEX = 0xffff }; private: vector regs; //!< All the registers together vector regTuples; //!< Tuples are used for many src / dst GBE_CLASS(RegisterFile); }; /*! Useful to encode anything special */ static const Register invalidRegister(RegisterFile::MAX_INDEX); /*! Output the register file string in the given stream */ std::ostream &operator<< (std::ostream &out, const RegisterFile &file); } /* namespace ir */ } /* namespace gbe */ #endif /* __GBE_IR_REGISTER_HPP__ */ Release_v0.3/backend/src/ir/sampler.cpp000066400000000000000000000122741223142177000201500ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * */ /** * \file sampler.cpp * */ #include "sampler.hpp" #include "context.hpp" #include "ocl_common_defines.h" namespace gbe { namespace ir { const uint32_t SamplerSet::getIdx(const Register reg) const { auto it = regMap.find(reg); GBE_ASSERT(it != regMap.end()); return it->second.slot; } void SamplerSet::appendReg(const Register reg, uint32_t key, Context *ctx) { struct SamplerRegSlot samplerSlot; samplerSlot.reg = reg; samplerSlot.slot = samplerMap.size(); samplerMap.insert(std::make_pair(key, samplerSlot)); regMap.insert(std::make_pair(samplerSlot.reg, samplerSlot)); } Register SamplerSet::append(uint32_t samplerValue, Context *ctx) { auto it = samplerMap.find(samplerValue); if (it != samplerMap.end()) return it->second.reg; // This register is just used as a key. Register reg = ctx->reg(FAMILY_DWORD); appendReg(reg, samplerValue, ctx); return reg; } #define SAMPLER_ID(id) ((id << __CLK_SAMPLER_ARG_BASE) | __CLK_SAMPLER_ARG_KEY_BIT) void SamplerSet::append(Register samplerReg, Context *ctx) { ir::FunctionArgument *arg = ctx->getFunction().getArg(samplerReg); GBE_ASSERT(arg != NULL); // XXX As LLVM 3.2/3.1 doesn't have a new data type for the sampler_t, we have to fix up the argument // type here. Once we switch to the LLVM and use the new data type sampler_t, we can remove this // work around. arg->type = ir::FunctionArgument::SAMPLER; int32_t id = ctx->getFunction().getArgID(arg); GBE_ASSERT(id < (1 << __CLK_SAMPLER_ARG_BITS)); auto it = samplerMap.find(SAMPLER_ID(id)); if (it != samplerMap.end()) { GBE_ASSERT(it->second.reg == samplerReg); return; } appendReg(samplerReg, SAMPLER_ID(id), ctx); } #define OUT_UPDATE_SZ(elt) SERIALIZE_OUT(elt, outs, ret_size) #define IN_UPDATE_SZ(elt) DESERIALIZE_IN(elt, ins, total_size) /*! Implements the serialization. */ size_t SamplerSet::serializeToBin(std::ostream& outs) { size_t ret_size = 0; OUT_UPDATE_SZ(magic_begin); OUT_UPDATE_SZ(samplerMap.size()); for (auto iter : samplerMap) { OUT_UPDATE_SZ(iter.first); OUT_UPDATE_SZ(iter.second.reg); OUT_UPDATE_SZ(iter.second.slot); } OUT_UPDATE_SZ(regMap.size()); for (auto iter : regMap) { OUT_UPDATE_SZ(iter.first); OUT_UPDATE_SZ(iter.second.reg); OUT_UPDATE_SZ(iter.second.slot); } OUT_UPDATE_SZ(magic_end); OUT_UPDATE_SZ(ret_size); return ret_size; } size_t SamplerSet::deserializeFromBin(std::istream& ins) { size_t total_size = 0; uint32_t magic; size_t sampler_map_sz = 0; IN_UPDATE_SZ(magic); if (magic != magic_begin) return 0; IN_UPDATE_SZ(sampler_map_sz); for (size_t i = 0; i < sampler_map_sz; i++) { uint32_t key; ir::SamplerRegSlot reg_slot; IN_UPDATE_SZ(key); IN_UPDATE_SZ(reg_slot.reg); IN_UPDATE_SZ(reg_slot.slot); samplerMap.insert(std::make_pair(key, reg_slot)); } IN_UPDATE_SZ(sampler_map_sz); for (size_t i = 0; i < sampler_map_sz; i++) { ir::Register key; ir::SamplerRegSlot reg_slot; IN_UPDATE_SZ(key); IN_UPDATE_SZ(reg_slot.reg); IN_UPDATE_SZ(reg_slot.slot); regMap.insert(std::make_pair(key, reg_slot)); } IN_UPDATE_SZ(magic); if (magic != magic_end) return 0; size_t total_bytes; IN_UPDATE_SZ(total_bytes); if (total_bytes + sizeof(total_size) != total_size) return 0; return total_size; } void SamplerSet::printStatus(int indent, std::ostream& outs) { using namespace std; string spaces = indent_to_str(indent); string spaces_nl = indent_to_str(indent + 4); outs << spaces << "------------ Begin SamplerSet ------------" << "\n"; outs << spaces_nl << " SamplerSet Map: [index, sampler_reg, sampler_slot]\n"; outs << spaces_nl << " samplerMap size: " << samplerMap.size() << "\n"; for (auto iter : samplerMap) { outs << spaces_nl << " [" << iter.first << ", " << iter.second.reg << ", " << iter.second.slot << "]\n"; } outs << spaces_nl << " SamplerSet Map: [reg, sampler_reg, sampler_slot]\n"; outs << spaces_nl << " regMap size: " << regMap.size() << "\n"; for (auto iter : regMap) { outs << spaces_nl << " [" << iter.first << ", " << iter.second.reg << ", " << iter.second.slot << "]\n"; } outs << spaces << "------------- End SamplerSet -------------" << "\n"; } } /* namespace ir */ } /* namespace gbe */ Release_v0.3/backend/src/ir/sampler.hpp000066400000000000000000000064031223142177000201520ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * */ /** * \file sampler.hpp * * \author Benjamin Segovia */ #ifndef __GBE_IR_SAMPLER_HPP__ #define __GBE_IR_SAMPLER_HPP__ #include "ir/register.hpp" #include "sys/map.hpp" namespace gbe { namespace ir { /*! A sampler set is a set of global samplers which are defined as constant global * sampler or defined in the outermost kernel scope variables. According to the spec * all the variable should have a initialized integer value and can't be modified. */ class Context; struct SamplerRegSlot { Register reg; uint32_t slot; }; class SamplerSet : public Serializable { public: /*! Append the specified sampler and return the allocated offset. * If the speficied sampler is exist, only return the previous offset and * don't append it again. Return -1, if failed.*/ Register append(uint32_t clkSamplerValue, Context *ctx); /*! Append a sampler defined in kernel args. */ void append(Register samplerArg, Context *ctx); /*! Get the sampler idx (actual location) */ const uint32_t getIdx(const Register reg) const; size_t getDataSize(void) { return samplerMap.size(); } size_t getDataSize(void) const { return samplerMap.size(); } void getData(uint32_t *samplers) const { for(auto &it : samplerMap) samplers[it.second.slot] = it.first; } void operator = (const SamplerSet& other) { regMap.insert(other.regMap.begin(), other.regMap.end()); samplerMap.insert(other.samplerMap.begin(), other.samplerMap.end()); } SamplerSet(const SamplerSet& other) : samplerMap(other.samplerMap.begin(), other.samplerMap.end()) { } SamplerSet() {} static const uint32_t magic_begin = TO_MAGIC('S', 'A', 'M', 'P'); static const uint32_t magic_end = TO_MAGIC('P', 'M', 'A', 'S'); /* format: magic_begin | samplerMap_size | element_1 | ........ | element_n | regMap_size | element_1 | ........ | element_n | magic_end | total_size */ /*! Implements the serialization. */ virtual size_t serializeToBin(std::ostream& outs); virtual size_t deserializeFromBin(std::istream& ins); virtual void printStatus(int indent, std::ostream& outs); private: void appendReg(const Register reg, uint32_t key, Context *ctx); map samplerMap; map regMap; GBE_CLASS(SamplerSet); }; } /* namespace ir */ } /* namespace gbe */ #endif /* __GBE_IR_SAMPLER_HPP__ */ Release_v0.3/backend/src/ir/type.cpp000066400000000000000000000031151223142177000174600ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file instruction.cpp * \author Benjamin Segovia */ #include "ir/type.hpp" namespace gbe { namespace ir { std::ostream &operator<< (std::ostream &out, const Type &type) { switch (type) { case TYPE_BOOL: return out << "bool"; case TYPE_S8: return out << "int8"; case TYPE_U8: return out << "uint8"; case TYPE_S16: return out << "int16"; case TYPE_U16: return out << "uin16"; case TYPE_S32: return out << "int32"; case TYPE_U32: return out << "uin32"; case TYPE_S64: return out << "int64"; case TYPE_U64: return out << "uin64"; case TYPE_HALF: return out << "half"; case TYPE_FLOAT: return out << "float"; case TYPE_DOUBLE: return out << "double"; }; return out; } } /* namespace ir */ } /* namespace gbe */ Release_v0.3/backend/src/ir/type.hpp000066400000000000000000000053641223142177000174750ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file type.hpp * * \author Benjamin Segovia */ #ifndef __GBE_IR_TYPE_HPP__ #define __GBE_IR_TYPE_HPP__ #include "sys/platform.hpp" #include "ir/register.hpp" #include namespace gbe { namespace ir { /*! All types possibly supported by the instruction */ enum Type : uint8_t { TYPE_BOOL = 0, //!< boolean value TYPE_S8, //!< signed 8 bits integer TYPE_U8, //!< unsigned 8 bits integer TYPE_S16, //!< signed 16 bits integer TYPE_U16, //!< unsigned 16 bits integer TYPE_S32, //!< signed 32 bits integer TYPE_U32, //!< unsigned 32 bits integer TYPE_S64, //!< signed 64 bits integer TYPE_U64, //!< unsigned 64 bits integer TYPE_HALF, //!< 16 bits floating point value TYPE_FLOAT, //!< 32 bits floating point value TYPE_DOUBLE //!< 64 bits floating point value }; /*! Output a string for the type in the given stream */ std::ostream &operator<< (std::ostream &out, const Type &type); /*! Get the register family for each type */ INLINE RegisterFamily getFamily(Type type) { switch (type) { case TYPE_BOOL: return FAMILY_BOOL; case TYPE_S8: case TYPE_U8: return FAMILY_BYTE; case TYPE_S16: case TYPE_U16: case TYPE_HALF: return FAMILY_WORD; case TYPE_S32: case TYPE_U32: case TYPE_FLOAT: return FAMILY_DWORD; case TYPE_S64: case TYPE_U64: case TYPE_DOUBLE: return FAMILY_QWORD; }; return FAMILY_DWORD; } /*! Return a type for each register family */ INLINE Type getType(RegisterFamily family) { switch (family) { case FAMILY_BOOL: return TYPE_BOOL; case FAMILY_BYTE: return TYPE_U8; case FAMILY_WORD: return TYPE_U16; case FAMILY_DWORD: return TYPE_U32; case FAMILY_QWORD: return TYPE_U64; }; return TYPE_U32; } } /* namespace ir */ } /* namespace gbe */ #endif /* __GBE_IR_TYPE_HPP__ */ Release_v0.3/backend/src/ir/unit.cpp000066400000000000000000000036471223142177000174700ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file unit.cpp * \author Benjamin Segovia */ #include "ir/unit.hpp" #include "ir/function.hpp" namespace gbe { namespace ir { Unit::Unit(PointerSize pointerSize) : pointerSize(pointerSize) {} Unit::~Unit(void) { for (const auto &pair : functions) GBE_DELETE(pair.second); } Function *Unit::getFunction(const std::string &name) const { auto it = functions.find(name); if (it == functions.end()) return NULL; return it->second; } Function *Unit::newFunction(const std::string &name) { auto it = functions.find(name); if (it != functions.end()) return NULL; Function *fn = GBE_NEW(Function, name, *this); functions[name] = fn; return fn; } void Unit::newConstant(const char *data, const std::string &name, uint32_t size, uint32_t alignment) { constantSet.append(data, name, size, alignment); } std::ostream &operator<< (std::ostream &out, const Unit &unit) { unit.apply([&out] (const Function &fn) { out << fn << std::endl; }); return out; } } /* namespace ir */ } /* namespace gbe */ Release_v0.3/backend/src/ir/unit.hpp000066400000000000000000000063341223142177000174710ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file unit.hpp * \author Benjamin Segovia */ #ifndef __GBE_IR_UNIT_HPP__ #define __GBE_IR_UNIT_HPP__ #include "ir/constant.hpp" #include "ir/register.hpp" #include "sys/hash_map.hpp" #include "sys/map.hpp" namespace gbe { namespace ir { // A unit contains a set of functions class Function; /*! Complete unit of compilation. It contains a set of functions and a set of * constant the functions may refer to. */ class Unit : public NonCopyable { public: typedef hash_map FunctionSet; /*! Create an empty unit */ Unit(PointerSize pointerSize = POINTER_32_BITS); /*! Release everything (*including* the function pointers) */ ~Unit(void); /*! Get the set of functions defined in the unit */ const FunctionSet &getFunctionSet(void) const { return functions; } /*! Retrieve the function by its name */ Function *getFunction(const std::string &name) const; /*! Return NULL if the function already exists */ Function *newFunction(const std::string &name); /*! Create a new constant in the constant set */ void newConstant(const char*, const std::string&, uint32_t size, uint32_t alignment); /*! Apply the given functor on all the functions */ template INLINE void apply(const T &functor) const { for (const auto &pair : functions) functor(*pair.second); } /*! Return the size of the pointers manipulated */ INLINE PointerSize getPointerSize(void) const { return pointerSize; } /*! Return the family of registers that contain pointer */ INLINE RegisterFamily getPointerFamily(void) const { if (this->getPointerSize() == POINTER_32_BITS) return FAMILY_DWORD; else return FAMILY_QWORD; } /*! Return the constant set */ ConstantSet& getConstantSet(void) { return constantSet; } /*! Return the constant set */ const ConstantSet& getConstantSet(void) const { return constantSet; } private: friend class ContextInterface; //!< Can free modify the unit hash_map functions; //!< All the defined functions ConstantSet constantSet; //!< All the constants defined in the unit PointerSize pointerSize; //!< Size shared by all pointers GBE_CLASS(Unit); }; /*! Output the unit string in the given stream */ std::ostream &operator<< (std::ostream &out, const Unit &unit); } /* namespace ir */ } /* namespace gbe */ #endif /* __GBE_IR_UNIT_HPP__ */ Release_v0.3/backend/src/ir/value.cpp000066400000000000000000000536071223142177000176260ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file value.cpp * \author Benjamin Segovia */ #include "ir/value.hpp" #include "ir/liveness.hpp" namespace gbe { namespace ir { /*! To build the chains (i.e. basically the graph of values), we are going to * iterate on liveout definitions: for each block and for each variable * (ir::Register) alive at the end of the block (in Block::LiveOut), we are * computing the set of all possible value definitions. Using these value * definitions, we will finally transfer these sets to the successors to get * the ud / du chains * * LiveOutSet contains the set of definitions for each basic block */ class LiveOutSet { public: LiveOutSet(Liveness &liveness, const FunctionDAG &dag); ~LiveOutSet(void); /*! One set per register */ typedef set RegDefSet; /*! We have one map of liveout register per block */ typedef map BlockDefMap; /*! All the block definitions map in the functions */ typedef map FunctionDefMap; /*! Performs the double look-up to get the set of defs per register */ RegDefSet &getDefSet(const BasicBlock *bb, Register reg); /*! Build a UD-chain as the union of the predecessor chains */ void makeDefSet(DefSet &udChain, const BasicBlock &bb, Register reg); /*! Fast per register definition set allocation */ DECL_POOL(RegDefSet, regDefSetPool); /*! Fast register sets allocation */ DECL_POOL(BlockDefMap, blockDefMapPool); FunctionDefMap defMap; //!< All per-block data Liveness &liveness; //!< Contains LiveOut information const FunctionDAG &dag; //!< Structure we are building private: /*! Initialize liveOut with the instruction destination values */ void initializeInstructionDef(void); /*! Initialize liveOut with the function argument, special and pushed * registers */ void initializeOtherDef(void); /*! Iterate to completely transfer the liveness and get the def sets */ void iterateLiveOut(void); /*! Use custom allocators */ GBE_CLASS(LiveOutSet); }; /*! Debug print of the liveout set */ std::ostream &operator<< (std::ostream &out, LiveOutSet &set); LiveOutSet::LiveOutSet(Liveness &liveness, const FunctionDAG &dag) : liveness(liveness), dag(dag) { this->initializeInstructionDef(); this->initializeOtherDef(); this->iterateLiveOut(); } LiveOutSet::RegDefSet &LiveOutSet::getDefSet(const BasicBlock *bb, Register reg) { auto bbIt = defMap.find(bb); GBE_ASSERT(bbIt != defMap.end()); auto defIt = bbIt->second->find(reg); GBE_ASSERT(defIt != bbIt->second->end() && defIt->second != NULL); return *defIt->second; } void LiveOutSet::makeDefSet(DefSet &udChain, const BasicBlock &bb, Register reg) { // Iterate over all the predecessors const auto &preds = bb.getPredecessorSet(); for (const auto &pred : preds) { RegDefSet &predDef = this->getDefSet(pred, reg); for (auto def : predDef) udChain.insert(def); } // If this is the top block we must take into account both function // arguments and special registers const Function &fn = bb.getParent(); if (fn.isEntryBlock(bb) == false) return; // Is it a function input? const FunctionArgument *arg = fn.getArg(reg); const PushLocation *pushed = fn.getPushLocation(reg); // Is it a pushed register? if (pushed != NULL) { ValueDef *def = const_cast(dag.getDefAddress(pushed)); udChain.insert(def); } // Is a function argument? else if (arg != NULL) { ValueDef *def = const_cast(dag.getDefAddress(arg)); udChain.insert(def); } // Is it a special register? else if (fn.isSpecialReg(reg) == true) { ValueDef *def = const_cast(dag.getDefAddress(reg)); udChain.insert(def); } } void LiveOutSet::initializeInstructionDef(void) { const Function &fn = liveness.getFunction(); // Iterate over each block and initialize the liveOut data fn.foreachBlock([&](const BasicBlock &bb) { GBE_ASSERT(defMap.find(&bb) == defMap.end()); // Allocate a map of register definitions auto blockDefMap = this->newBlockDefMap(); defMap.insert(std::make_pair(&bb, blockDefMap)); // We only consider liveout registers const auto &info = this->liveness.getBlockInfo(&bb); const auto &liveOut = info.liveOut; for (auto reg : liveOut) { GBE_ASSERT(blockDefMap->find(reg) == blockDefMap->end()); auto regDefSet = this->newRegDefSet(); blockDefMap->insert(std::make_pair(reg, regDefSet)); } // Now traverse the blocks backwards and find the definition of each // liveOut register set defined; for (auto it = --bb.end(); it != bb.end(); --it) { const Instruction &insn = *it; const uint32_t dstNum = insn.getDstNum(); for (uint32_t dstID = 0; dstID < dstNum; ++dstID) { const Register reg = insn.getDst(dstID); // We only take the most recent definition if (defined.contains(reg) == true) continue; // Not in LiveOut, so does not matter if (info.inLiveOut(reg) == false) continue; defined.insert(reg); // Insert the outgoing definition for this register auto regDefSet = blockDefMap->find(reg); ValueDef *def = const_cast(this->dag.getDefAddress(&insn, dstID)); GBE_ASSERT(regDefSet != blockDefMap->end() && def != NULL); regDefSet->second->insert(def); } } }); } void LiveOutSet::initializeOtherDef(void) { const Function &fn = liveness.getFunction(); const uint32_t argNum = fn.argNum(); // The first block must also transfer the function arguments const BasicBlock &top = fn.getTopBlock(); const Liveness::BlockInfo &info = this->liveness.getBlockInfo(&top); GBE_ASSERT(defMap.contains(&top) == true); auto blockDefMap = defMap.find(&top)->second; // Insert all the values that are not overwritten in the block and alive at // the end of it for (uint32_t argID = 0; argID < argNum; ++argID) { const FunctionArgument &arg = fn.getArg(argID); const Register reg = arg.reg; // Do not transfer dead values if (info.inLiveOut(reg) == false) continue; // If we overwrite it, do not transfer the initial value if (info.inVarKill(reg) == true) continue; ValueDef *def = const_cast(this->dag.getDefAddress(&arg)); auto it = blockDefMap->find(reg); GBE_ASSERT(it != blockDefMap->end()); it->second->insert(def); } // Now transfer the special registers that are not over-written const uint32_t firstID = fn.getFirstSpecialReg(); const uint32_t specialNum = fn.getSpecialRegNum(); for (uint32_t regID = firstID; regID < firstID + specialNum; ++regID) { const Register reg(regID); // Do not transfer dead values if (info.inLiveOut(reg) == false) continue; // If we overwrite it, do not transfer the initial value if (info.inVarKill(reg) == true) continue; ValueDef *def = const_cast(this->dag.getDefAddress(reg)); auto it = blockDefMap->find(reg); GBE_ASSERT(it != blockDefMap->end()); it->second->insert(def); } // Finally do the same thing with pushed registers const Function::PushMap &pushMap = fn.getPushMap(); for (const auto &pushed : pushMap) { const Register reg = pushed.first; // Do not transfer dead values if (info.inLiveOut(reg) == false) continue; // If we overwrite it, do not transfer the initial value if (info.inVarKill(reg) == true) continue; ValueDef *def = const_cast(this->dag.getDefAddress(&pushed.second)); auto it = blockDefMap->find(reg); GBE_ASSERT(it != blockDefMap->end()); it->second->insert(def); } } void LiveOutSet::iterateLiveOut(void) { bool changed = true; while (changed) { changed = false; // Compute the union of the current liveout definitions with the previous // ones. Do not take into account the killed values though liveness.foreach([&](Liveness::BlockInfo &curr, const Liveness::BlockInfo &pred) { const BasicBlock &bb = curr.bb; const BasicBlock &pbb = pred.bb; for (auto reg : curr.liveOut) { if (pred.inLiveOut(reg) == false) continue; if (curr.inVarKill(reg) == true) continue; RegDefSet &currSet = this->getDefSet(&bb, reg); RegDefSet &predSet = this->getDefSet(&pbb, reg); // Transfer the values for (auto def : predSet) { if (currSet.contains(def)) continue; changed = true; currSet.insert(def); } } }); } } LiveOutSet::~LiveOutSet(void) { for (const auto pair : defMap) { BlockDefMap *block = pair.second; for (auto regSet : *block) this->deleteRegDefSet(regSet.second); this->deleteBlockDefMap(block); } } std::ostream &operator<< (std::ostream &out, LiveOutSet &set) { for (const auto &pair : set.defMap) { // To recognize the block, just print its instructions out << "Block:" << std::endl; for (const auto &insn : *pair.first) out << insn << std::endl; // Iterate over all alive registers to get their definitions const LiveOutSet::BlockDefMap *defMap = pair.second; if (defMap->size() > 0) out << "LiveSet:" << std::endl; for (const auto &pair : *defMap) { const Register reg = pair.first; const LiveOutSet::RegDefSet *set = pair.second; for (auto def : *set) { const ValueDef::Type type = def->getType(); if (type == ValueDef::DEF_FN_ARG) out << "%" << reg << ": " << "function input" << std::endl; else if (type == ValueDef::DEF_FN_PUSHED) out << "%" << reg << ": " << "pushed register" << std::endl; else if (type == ValueDef::DEF_SPECIAL_REG) out << "%" << reg << ": " << "special register" << std::endl; else { const Instruction *insn = def->getInstruction(); out << "%" << reg << ": " << insn << " " << *insn << std::endl; } } } out << std::endl; } return out; } FunctionDAG::FunctionDAG(Liveness &liveness) : fn(liveness.getFunction()) { // We first start with empty chains udEmpty = this->newDefSet(); duEmpty = this->newUseSet(); // First create the chains and insert them in their respective maps fn.foreachInstruction([this](const Instruction &insn) { // sources == value uses const uint32_t srcNum = insn.getSrcNum(); for (uint32_t srcID = 0; srcID < srcNum; ++srcID) { ValueUse *valueUse = this->newValueUse(&insn, srcID); useName.insert(std::make_pair(*valueUse, valueUse)); udGraph.insert(std::make_pair(*valueUse, udEmpty)); } // destinations == value defs const uint32_t dstNum = insn.getDstNum(); for (uint32_t dstID = 0; dstID < dstNum; ++dstID) { ValueDef *valueDef = this->newValueDef(&insn, dstID); defName.insert(std::make_pair(*valueDef, valueDef)); duGraph.insert(std::make_pair(*valueDef, duEmpty)); } }); // Function arguments are also value definitions const uint32_t argNum = fn.argNum(); for (uint32_t argID = 0; argID < argNum; ++argID) { const FunctionArgument &arg = fn.getArg(argID); ValueDef *valueDef = this->newValueDef(&arg); defName.insert(std::make_pair(*valueDef, valueDef)); duGraph.insert(std::make_pair(*valueDef, duEmpty)); } // Special registers are also definitions const uint32_t firstID = fn.getFirstSpecialReg(); const uint32_t specialNum = fn.getSpecialRegNum(); for (uint32_t regID = firstID; regID < firstID + specialNum; ++regID) { const Register reg(regID); ValueDef *valueDef = this->newValueDef(reg); defName.insert(std::make_pair(*valueDef, valueDef)); duGraph.insert(std::make_pair(*valueDef, duEmpty)); } // Pushed registers are also definitions const Function::PushMap &pushMap = fn.getPushMap(); for (const auto &pushed : pushMap) { ValueDef *valueDef = this->newValueDef(&pushed.second); defName.insert(std::make_pair(*valueDef, valueDef)); duGraph.insert(std::make_pair(*valueDef, duEmpty)); } // We create the liveOutSet to help us transfer the definitions LiveOutSet liveOutSet(liveness, *this); // Build UD chains traversing the blocks top to bottom fn.foreachBlock([&](const BasicBlock &bb) { // Track the allocated chains to be able to reuse them map allocated; // Some chains may be not used (ie they are dead). We track them to be // able to deallocate them later set unused; // For each instruction build the UD chains const_cast(bb).foreach([&](const Instruction &insn) { // Instruction sources consumes definitions const uint32_t srcNum = insn.getSrcNum(); for (uint32_t srcID = 0; srcID < srcNum; ++srcID) { const Register src = insn.getSrc(srcID); const ValueUse use(&insn, srcID); auto ud = udGraph.find(use); GBE_ASSERT(ud != udGraph.end()); // We already allocate the ud chain for this register auto it = allocated.find(src); if (it != allocated.end()) { udGraph.erase(ud); udGraph.insert(std::make_pair(use, it->second)); if (unused.contains(it->second)) unused.erase(it->second); } // Create a new one from the predecessor chains (upward used value) else { DefSet *udChain = this->newDefSet(); liveOutSet.makeDefSet(*udChain, bb, src); allocated.insert(std::make_pair(src, udChain)); ud->second = udChain; } } // Instruction destinations create new chains const uint32_t dstNum = insn.getDstNum(); for (uint32_t dstID = 0; dstID < dstNum; ++dstID) { const Register dst = insn.getDst(dstID); ValueDef *def = const_cast(this->getDefAddress(&insn, dstID)); DefSet *udChain = this->newDefSet(); udChain->insert(def); unused.insert(udChain); // Remove the previous definition if any if (allocated.contains(dst) == true) allocated.erase(dst); allocated.insert(std::make_pair(dst, udChain)); } }); // Deallocate unused chains for (auto set : unused) this->deleteDefSet(set); }); // Build the DU chains from the UD ones fn.foreachInstruction([&](const Instruction &insn) { // For each value definition of each source, we push back this use const uint32_t srcNum = insn.getSrcNum(); for (uint32_t srcID = 0; srcID < srcNum; ++srcID) { ValueUse *use = const_cast(getUseAddress(&insn, srcID)); // Find all definitions for this source const auto &defs = this->getDef(&insn, srcID); for (auto def : defs) { auto uses = duGraph.find(*def); UseSet *du = uses->second; GBE_ASSERT(uses != duGraph.end()); if (du == duEmpty) { duGraph.erase(*def); du = this->newUseSet(); duGraph.insert(std::make_pair(*def, du)); } du->insert(use); } } }); // Allocate the set of uses and defs per register const uint32_t regNum = fn.regNum(); for (uint32_t regID = 0; regID < regNum; ++regID) { const Register reg(regID); UseSet *useSet = GBE_NEW_NO_ARG(UseSet); DefSet *defSet = GBE_NEW_NO_ARG(DefSet); regUse.insert(std::make_pair(reg, useSet)); regDef.insert(std::make_pair(reg, defSet)); } // Fill use sets (one per register) for (auto &useSet : duGraph) { for (auto use : *useSet.second) { const Register reg = use->getRegister(); auto it = regUse.find(reg); GBE_ASSERT(it != regUse.end() && it->second != NULL); it->second->insert(use); } } // Fill def sets (one per register) for (auto &defSet : udGraph) { for (auto def : *defSet.second) { const Register reg = def->getRegister(); auto it = regDef.find(reg); GBE_ASSERT(it != regDef.end() && it->second != NULL); it->second->insert(def); } } } /*! Helper to deallocate objects */ #define PTR_RELEASE(TYPE, VAR) \ do { \ if (VAR && destroyed.contains(VAR) == false) { \ destroyed.insert(VAR); \ delete##TYPE(VAR); \ } \ } while (0) FunctionDAG::~FunctionDAG(void) { // We track the already destroyed pointers set destroyed; // Release the empty ud-chains and du-chains PTR_RELEASE(DefSet, udEmpty); PTR_RELEASE(UseSet, duEmpty); // We free all the ud-chains for (const auto &pair : udGraph) { auto defs = pair.second; if (destroyed.contains(defs)) continue; for (auto def : *defs) PTR_RELEASE(ValueDef, def); PTR_RELEASE(DefSet, defs); } // We free all the du-chains for (const auto &pair : duGraph) { auto uses = pair.second; if (destroyed.contains(uses)) continue; for (auto use : *uses) PTR_RELEASE(ValueUse, use); PTR_RELEASE(UseSet, uses); } // Release all the use and definition sets per register for (const auto &pair : regUse) GBE_SAFE_DELETE(pair.second); for (const auto &pair : regDef) GBE_SAFE_DELETE(pair.second); } #undef PTR_RELEASE const UseSet &FunctionDAG::getUse(const ValueDef &def) const { auto it = duGraph.find(def); GBE_ASSERT(it != duGraph.end()); return *it->second; } const UseSet &FunctionDAG::getUse(const Instruction *insn, uint32_t dstID) const { return this->getUse(ValueDef(insn, dstID)); } const UseSet &FunctionDAG::getUse(const FunctionArgument *arg) const { return this->getUse(ValueDef(arg)); } const UseSet &FunctionDAG::getUse(const Register ®) const { return this->getUse(ValueDef(reg)); } const DefSet &FunctionDAG::getDef(const ValueUse &use) const { auto it = udGraph.find(use); GBE_ASSERT(it != udGraph.end()); return *it->second; } const DefSet &FunctionDAG::getDef(const Instruction *insn, uint32_t srcID) const { return this->getDef(ValueUse(insn, srcID)); } const ValueDef *FunctionDAG::getDefAddress(const ValueDef &def) const { auto it = defName.find(def); GBE_ASSERT(it != defName.end() && it->second != NULL); return it->second; } const ValueDef *FunctionDAG::getDefAddress(const PushLocation *pushed) const { return this->getDefAddress(ValueDef(pushed)); } const ValueDef *FunctionDAG::getDefAddress(const Instruction *insn, uint32_t dstID) const { return this->getDefAddress(ValueDef(insn, dstID)); } const ValueDef *FunctionDAG::getDefAddress(const FunctionArgument *arg) const { return this->getDefAddress(ValueDef(arg)); } const ValueDef *FunctionDAG::getDefAddress(const Register ®) const { return this->getDefAddress(ValueDef(reg)); } const ValueUse *FunctionDAG::getUseAddress(const Instruction *insn, uint32_t srcID) const { const ValueUse use(insn, srcID); auto it = useName.find(use); GBE_ASSERT(it != useName.end() && it->second != NULL); return it->second; } std::ostream &operator<< (std::ostream &out, const FunctionDAG &dag) { const Function &fn = dag.getFunction(); // Print all uses for the definitions and all definitions for each uses fn.foreachInstruction([&](const Instruction &insn) { out << &insn << ": " << insn << std::endl; // Display the set of definition for each destination const uint32_t dstNum = insn.getDstNum(); if (dstNum > 0) out << "USES:" << std::endl; for (uint32_t dstID = 0; dstID < dstNum; ++dstID) { const Register reg = insn.getDst(dstID); const auto &uses = dag.getUse(&insn, dstID); for (auto use : uses) { const Instruction *other = use->getInstruction(); out << " %" << reg << " " << other << ": " << *other << std::endl; } } // Display the set of definitions for each source const uint32_t srcNum = insn.getSrcNum(); if (srcNum > 0) out << "DEFS:" << std::endl; for (uint32_t srcID = 0; srcID < srcNum; ++srcID) { const Register reg = insn.getSrc(srcID); const auto &defs = dag.getDef(&insn, srcID); for (auto def : defs) { if (def->getType() == ValueDef::DEF_FN_PUSHED) out << " %" << reg << " # pushed register" << std::endl; else if (def->getType() == ValueDef::DEF_FN_ARG) out << " %" << reg << " # function argument" << std::endl; else if (def->getType() == ValueDef::DEF_SPECIAL_REG) out << " %" << reg << " # special register" << std::endl; else { const Instruction *other = def->getInstruction(); out << " %" << reg << " " << other << ": " << *other << std::endl; } } } out << std::endl; }); return out; } } /* namespace ir */ } /* namespace gbe */ Release_v0.3/backend/src/ir/value.hpp000066400000000000000000000247351223142177000176330ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file value.hpp * \author Benjamin Segovia */ #ifndef __GBE_IR_VALUE_HPP__ #define __GBE_IR_VALUE_HPP__ #include "ir/instruction.hpp" #include "ir/function.hpp" #include "sys/set.hpp" #include "sys/map.hpp" namespace gbe { namespace ir { // Make UD-Chain and DU-Chain computations faster and easier class Liveness; /*! A value definition is a destination of an instruction or a function * argument. Since we support multiple destinations, we also add the * destination ID. */ class ValueDef { public: /*! Discriminates the kind of values */ enum Type : uint32_t { DEF_FN_ARG = 0, DEF_FN_PUSHED = 1, DEF_INSN_DST = 2, DEF_SPECIAL_REG = 3 }; /*! Build a value from an instruction destination */ explicit ValueDef(const Instruction *insn, uint32_t dstID = 0u) : type(DEF_INSN_DST) { this->data.insn = insn; this->data.dstID = dstID; } /*! Build a value from a function argument */ explicit ValueDef(const FunctionArgument *arg) : type(DEF_FN_ARG) { this->data.arg = arg; } /*! Build a value from a pushed register */ explicit ValueDef(const PushLocation *pushed) : type(DEF_FN_PUSHED) { this->data.pushed = pushed; } /*! Build a value from a special register */ explicit ValueDef(const Register ®) : type(DEF_SPECIAL_REG) { this->data.regID = uint32_t(reg); } /*! Get the type of the value */ INLINE Type getType(void) const { return type; } /*! Get the instruction (only if this is a instruction value) */ INLINE const Instruction *getInstruction(void) const { GBE_ASSERT(type == DEF_INSN_DST); return data.insn; } /*! Get the destination ID (only if this is a instruction value) */ INLINE uint32_t getDstID(void) const { GBE_ASSERT(type == DEF_INSN_DST); return data.dstID; } /*! Get the function input (only if this is a function argument) */ INLINE const FunctionArgument *getFunctionArgument(void) const { GBE_ASSERT(type == DEF_FN_ARG); return data.arg; } /*! Get the pushed location */ INLINE const PushLocation *getPushLocation(void) const { GBE_ASSERT(type == DEF_FN_PUSHED); return data.pushed; } /*! Get the special register */ INLINE Register getSpecialReg(void) const { GBE_ASSERT(type == DEF_SPECIAL_REG); return Register(data.regID); } /*! Retrieve the register associated to the definition */ INLINE Register getRegister(void) const { if (type == DEF_SPECIAL_REG) return Register(data.regID); else if (type == DEF_FN_ARG) return data.arg->reg; else if (type == DEF_FN_PUSHED) return data.pushed->getRegister(); else return data.insn->getDst(data.dstID); } private: /*! Instruction or function argument */ union Data { /*! Instruction destination or ... */ struct { const Instruction *insn; //getSrc(srcID); } private: const Instruction *insn; //!< Instruction where the value is used uint32_t srcID; //!< Index of the source in the instruction GBE_CLASS(ValueUse); // Use gbe allocators }; /*! Compare two value uses (used in maps) */ INLINE bool operator< (const ValueUse &use0, const ValueUse &use1) { const Instruction *insn0 = use0.getInstruction(); const Instruction *insn1 = use1.getInstruction(); if (insn0 != insn1) return uintptr_t(insn0) < uintptr_t(insn1); const uint32_t src0 = use0.getSrcID(); const uint32_t src1 = use1.getSrcID(); return src0 < src1; } /*! All uses of a definition */ typedef set UseSet; /*! All possible definitions for a use */ typedef set DefSet; /*! Get the chains (in both directions) for the complete program. This data * structure is unfortunately way too brutal. Using std::sets all over the * place just burns a huge amount of memory. There is work to do to decrease * the memory footprint */ class FunctionDAG : public NonCopyable { public: /*! Build the complete DU/UD graphs for the program included in liveness */ FunctionDAG(Liveness &liveness); /*! Free all the resources */ ~FunctionDAG(void); /*! Get the du-chain for the definition */ const UseSet &getUse(const ValueDef &def) const; /*! Get the du-chain for the given instruction and destination */ const UseSet &getUse(const Instruction *insn, uint32_t dstID) const; /*! Get the du-chain for the given function input */ const UseSet &getUse(const FunctionArgument *arg) const; /*! Get the du-chain for the given pushed location */ const UseSet &getUse(const PushLocation *pushed) const; /*! Get the du-chain for the given special register */ const UseSet &getUse(const Register ®) const; /*! Get the ud-chain for the given use */ const DefSet &getDef(const ValueUse &use) const; /*! Get the ud-chain for the instruction and source */ const DefSet &getDef(const Instruction *insn, uint32_t srcID) const; /*! Get the pointer to the definition *as stored in the DAG* */ const ValueDef *getDefAddress(const ValueDef &def) const; /*! Get the pointer to the definition *as stored in the DAG* */ const ValueDef *getDefAddress(const PushLocation *pushed) const; /*! Get the pointer to the definition *as stored in the DAG* */ const ValueDef *getDefAddress(const Instruction *insn, uint32_t dstID) const; /*! Get the pointer to the definition *as stored in the DAG* */ const ValueDef *getDefAddress(const FunctionArgument *input) const; /*! Get the pointer to the definition *as stored in the DAG* */ const ValueDef *getDefAddress(const Register ®) const; /*! Get the pointer to the use *as stored in the DAG* */ const ValueUse *getUseAddress(const Instruction *insn, uint32_t srcID) const; /*! Get the set of all uses for the register */ const UseSet *getRegUse(const Register ®) const; /*! Get the set of all definitions for the register */ const DefSet *getRegDef(const Register ®) const; /*! Get the function we have the graph for */ INLINE const Function &getFunction(void) const { return fn; } /*! The DefSet for each definition use */ typedef map UDGraph; /*! The UseSet for each definition */ typedef map DUGraph; private: UDGraph udGraph; //!< All the UD chains DUGraph duGraph; //!< All the DU chains DefSet *udEmpty; //!< Void use set UseSet *duEmpty; //!< Void def set ValueDef *undefined; //!< Undefined value map useName; //!< Get the ValueUse pointer from the value map defName; //!< Get the ValueDef pointer from the value map regUse; //!< All uses of registers map regDef; //!< All defs of registers DECL_POOL(ValueDef, valueDefPool); //!< Fast ValueDef allocation DECL_POOL(ValueUse, valueUsePool); //!< Fast ValueUse allocation DECL_POOL(DefSet, udChainPool); //!< Fast DefSet allocation DECL_POOL(UseSet, duChainPool); //!< Fast UseSet allocation const Function &fn; //!< Function we are referring to GBE_CLASS(FunctionDAG); // Use internal allocators }; /*! Pretty print of the function DAG */ std::ostream &operator<< (std::ostream &out, const FunctionDAG &dag); } /* namespace ir */ } /* namespace gbe */ #endif /* __GBE_IR_VALUE_HPP__ */ Release_v0.3/backend/src/llvm/000077500000000000000000000000001223142177000163335ustar00rootroot00000000000000Release_v0.3/backend/src/llvm/llvm_gen_backend.cpp000066400000000000000000003273671223142177000223330ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file llvm_gen_backend.cpp * \author Benjamin Segovia */ /* Transform the LLVM IR code into Gen IR code i.e. our temporary representation * for programs running on Gen. * * Overview * ======== * * This code is mostly inspired by the (now defunct and replaced by CppBackend) * CBackend. Basically, there are two ways to transform LLVM code into machine * code (or anything else) * - You write a complete LLVM backend by the book. LLVM proposes a lot of * useful tools to do so. This is obviously the path chosen by all CPU guys * but also by AMD and nVidia which both use the backend infrastructure to * output their own intermediate language. The good point is that you can * reuse a lot of tools (like proper PHI elimination with phi congruence and * global copy propagation a la Chaitin). Bad points are: * 1/ It is a *long* journey to generate anything. * 2/ More importantly, the code is hugely biased towards CPUs. Typically, * the way registers are defined do not fit well Gen register file (which * is really more like a regular piece of memory). Same issue apply for * predicated instructions with mask which is a bit boring to use with * SSA. Indeed, since DAGSelection still manipulates SSA values, anything * predicated requires to insert extra sources * - You write function passes to do the translation yourself. Obviously, you * reinvent the wheel. However, it is easy to do and easier to maintain * (somehow) * * So, the code here just traverses LLVM asm and generates our own ISA. The * generated code is OK even if a global copy propagation pass is still overdue. * Right now, it is pretty straighforward and simplistic in that regard * * About Clang and the ABI / target * ================================ * * A major question is: how did we actually generate this LLVM code from OpenCL? * Well, thing is that there is no generic target in LLVM since there are many * dependencies on endianness or ABIs. Fortunately, the ptx (and nvptx for LLVM * 3.2) profile is pretty well adapted to our needs since NV and Gen GPU are * kind of similar, or at least they are similar enough to share the same front * end. * * Problems * ======== * * - Several things regarding constants like ConstantExpr are not properly handled. * - ptx front end generates function calls. Since we do not support them yet, * the user needs to force the inlining of all functions. If a function call * is intercepted, we just abort */ #include "llvm/Config/config.h" #if LLVM_VERSION_MINOR <= 2 #include "llvm/CallingConv.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Module.h" #include "llvm/Instructions.h" #else #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Module.h" #include "llvm/IR/Instructions.h" #endif /* LLVM_VERSION_MINOR <= 2 */ #include "llvm/Pass.h" #include "llvm/PassManager.h" #if LLVM_VERSION_MINOR <= 2 #include "llvm/Intrinsics.h" #include "llvm/IntrinsicInst.h" #include "llvm/InlineAsm.h" #else #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/InlineAsm.h" #endif /* LLVM_VERSION_MINOR <= 2 */ #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/ConstantsScanner.h" #include "llvm/Analysis/FindUsedTypes.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/IntrinsicLowering.h" #include "llvm/Target/Mangler.h" #include "llvm/Transforms/Scalar.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" #if !defined(LLVM_VERSION_MAJOR) || (LLVM_VERSION_MINOR == 1) #include "llvm/Target/TargetData.h" #elif LLVM_VERSION_MINOR == 2 #include "llvm/DataLayout.h" #else #include "llvm/IR/DataLayout.h" #endif #include "llvm/Support/CallSite.h" #include "llvm/Support/CFG.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #if (LLVM_VERSION_MAJOR == 3) && (LLVM_VERSION_MINOR <= 2) #include "llvm/Support/InstVisitor.h" #else #include "llvm/InstVisitor.h" #endif #include "llvm/Support/MathExtras.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/Host.h" #include "llvm/Support/ToolOutputFile.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Config/config.h" #include "llvm/llvm_gen_backend.hpp" #include "ir/context.hpp" #include "ir/unit.hpp" #include "ir/liveness.hpp" #include "sys/set.hpp" #include "sys/cvar.hpp" /* Not defined for LLVM 3.0 */ #if !defined(LLVM_VERSION_MAJOR) #define LLVM_VERSION_MAJOR 3 #endif /* !defined(LLVM_VERSION_MAJOR) */ #if !defined(LLVM_VERSION_MINOR) #define LLVM_VERSION_MINOR 0 #endif /* !defined(LLVM_VERSION_MINOR) */ #if (LLVM_VERSION_MAJOR != 3) || (LLVM_VERSION_MINOR > 4) #error "Only LLVM 3.0 - 3.4 is supported" #endif /* (LLVM_VERSION_MAJOR != 3) || (LLVM_VERSION_MINOR > 4) */ using namespace llvm; namespace gbe { /*! Gen IR manipulates only scalar types */ static bool isScalarType(const Type *type) { return type->isFloatTy() || type->isIntegerTy() || type->isDoubleTy() || type->isPointerTy(); } /*! LLVM IR Type to Gen IR type translation */ static ir::Type getType(const ir::Context &ctx, const Type *type) { GBE_ASSERT(isScalarType(type)); if (type->isFloatTy() == true) return ir::TYPE_FLOAT; if (type->isDoubleTy() == true) return ir::TYPE_DOUBLE; if (type->isPointerTy() == true) { if (ctx.getPointerSize() == ir::POINTER_32_BITS) return ir::TYPE_U32; else return ir::TYPE_U64; } GBE_ASSERT(type->isIntegerTy() == true); if (type == Type::getInt1Ty(type->getContext())) return ir::TYPE_BOOL; if (type == Type::getInt8Ty(type->getContext())) return ir::TYPE_S8; if (type == Type::getInt16Ty(type->getContext())) return ir::TYPE_S16; if (type == Type::getInt32Ty(type->getContext())) return ir::TYPE_S32; if (type == Type::getInt64Ty(type->getContext())) return ir::TYPE_S64; GBE_ASSERT(0); return ir::TYPE_S64; } /*! LLVM IR Type to Gen IR unsigned type translation */ static ir::Type getUnsignedType(const ir::Context &ctx, const Type *type) { GBE_ASSERT(type->isIntegerTy() == true); if (type == Type::getInt1Ty(type->getContext())) return ir::TYPE_BOOL; if (type == Type::getInt8Ty(type->getContext())) return ir::TYPE_U8; if (type == Type::getInt16Ty(type->getContext())) return ir::TYPE_U16; if (type == Type::getInt32Ty(type->getContext())) return ir::TYPE_U32; if (type == Type::getInt64Ty(type->getContext())) return ir::TYPE_U64; GBE_ASSERT(0); return ir::TYPE_U64; } /*! Type to register family translation */ static ir::RegisterFamily getFamily(const ir::Context &ctx, const Type *type) { GBE_ASSERT(isScalarType(type) == true); if (type == Type::getInt1Ty(type->getContext())) return ir::FAMILY_BOOL; if (type == Type::getInt8Ty(type->getContext())) return ir::FAMILY_BYTE; if (type == Type::getInt16Ty(type->getContext())) return ir::FAMILY_WORD; if (type == Type::getInt32Ty(type->getContext()) || type->isFloatTy()) return ir::FAMILY_DWORD; if (type == Type::getInt64Ty(type->getContext()) || type->isDoubleTy()) return ir::FAMILY_QWORD; if (type->isPointerTy()) return ctx.getPointerFamily(); GBE_ASSERT(0); return ir::FAMILY_BOOL; } /*! Get number of element to process dealing either with a vector or a scalar * value */ static ir::Type getVectorInfo(const ir::Context &ctx, Type *llvmType, Value *value, uint32_t &elemNum, bool useUnsigned = false) { ir::Type type; if (llvmType->isVectorTy() == true) { VectorType *vectorType = cast(llvmType); Type *elementType = vectorType->getElementType(); elemNum = vectorType->getNumElements(); if (useUnsigned) type = getUnsignedType(ctx, elementType); else type = getType(ctx, elementType); } else { elemNum = 1; if (useUnsigned) type = getUnsignedType(ctx, llvmType); else type = getType(ctx, llvmType); } return type; } /*! OCL to Gen-IR address type */ static INLINE ir::AddressSpace addressSpaceLLVMToGen(unsigned llvmMemSpace) { switch (llvmMemSpace) { case 0: return ir::MEM_PRIVATE; case 1: return ir::MEM_GLOBAL; case 2: return ir::MEM_CONSTANT; case 3: return ir::MEM_LOCAL; case 4: return ir::IMAGE; } GBE_ASSERT(false); return ir::MEM_GLOBAL; } static Constant *extractConstantElem(Constant *CPV, uint32_t index) { ConstantVector *CV = dyn_cast(CPV); GBE_ASSERT(CV != NULL); #if GBE_DEBUG const uint32_t elemNum = CV->getNumOperands(); GBE_ASSERTM(index < elemNum, "Out-of-bound constant vector access"); #endif /* GBE_DEBUG */ CPV = cast(CV->getOperand(index)); return CPV; } /*! Handle the LLVM IR Value to Gen IR register translation. This has 2 roles: * - Split the LLVM vector into several scalar values * - Handle the transparent copies (bitcast or use of intrincics functions * like get_local_id / get_global_id */ class RegisterTranslator { public: /*! Indices will be zero for scalar values */ typedef std::pair ValueIndex; RegisterTranslator(ir::Context &ctx) : ctx(ctx) {} /*! Empty the maps */ void clear(void) { valueMap.clear(); scalarMap.clear(); } /*! Some values will not be allocated. For example, a bit-cast destination * like: %fake = bitcast %real or a vector insertion since we do not have * vectors in Gen-IR */ void newValueProxy(Value *real, Value *fake, uint32_t realIndex = 0u, uint32_t fakeIndex = 0u) { const ValueIndex key(fake, fakeIndex); const ValueIndex value(real, realIndex); GBE_ASSERT(valueMap.find(key) == valueMap.end()); // Do not insert twice valueMap[key] = value; } /*! Mostly used for the preallocated registers (lids, gids) */ void newScalarProxy(ir::Register reg, Value *value, uint32_t index = 0u) { const ValueIndex key(value, index); GBE_ASSERT(scalarMap.find(key) == scalarMap.end()); scalarMap[key] = reg; } /*! Allocate a new scalar register */ ir::Register newScalar(Value *value, Value *key = NULL, uint32_t index = 0u) { // we don't allow normal constant, but GlobalValue is a special case, // it needs a register to store its address GBE_ASSERT(! (isa(value) && !isa(value))); Type *type = value->getType(); auto typeID = type->getTypeID(); switch (typeID) { case Type::IntegerTyID: case Type::FloatTyID: case Type::DoubleTyID: case Type::PointerTyID: GBE_ASSERT(index == 0); return this->newScalar(value, key, type, index); break; case Type::VectorTyID: { auto vectorType = cast(type); auto elementType = vectorType->getElementType(); auto elementTypeID = elementType->getTypeID(); if (elementTypeID != Type::IntegerTyID && elementTypeID != Type::FloatTyID && elementTypeID != Type::DoubleTyID) GBE_ASSERTM(false, "Vectors of elements are not supported"); return this->newScalar(value, key, elementType, index); break; } default: NOT_SUPPORTED; }; return ir::Register(); } /*! iterating in the value map to get the final real register */ void getRealValue(Value* &value, uint32_t& index) { auto end = valueMap.end(); for (;;) { auto it = valueMap.find(std::make_pair(value, index)); if (it == end) break; else { value = it->second.first; index = it->second.second; } } } /*! Get the register from the given value at given index possibly iterating * in the value map to get the final real register */ ir::Register getScalar(Value *value, uint32_t index = 0u) { getRealValue(value, index); const auto key = std::make_pair(value, index); GBE_ASSERT(scalarMap.find(key) != scalarMap.end()); return scalarMap[key]; } /*! Insert a given register at given Value position */ void insertRegister(const ir::Register ®, Value *value, uint32_t index) { const auto key = std::make_pair(value, index); GBE_ASSERT(scalarMap.find(key) == scalarMap.end()); scalarMap[key] = reg; } /*! Says if the value exists. Otherwise, it is undefined */ bool valueExists(Value *value, uint32_t index) { getRealValue(value, index); const auto key = std::make_pair(value, index); return scalarMap.find(key) != scalarMap.end(); } /*! if it's a undef const value, return true. Otherwise, return false. */ bool isUndefConst(Value *value, uint32_t index) { getRealValue(value, index); Constant *CPV = dyn_cast(value); if(CPV && dyn_cast(CPV)) CPV = extractConstantElem(CPV, index); return (CPV && (isa(CPV))); } private: /*! This creates a scalar register for a Value (index is the vector index when * the value is a vector of scalars) */ ir::Register newScalar(Value *value, Value *key, Type *type, uint32_t index) { const ir::RegisterFamily family = getFamily(ctx, type); const ir::Register reg = ctx.reg(family); key = key == NULL ? value : key; this->insertRegister(reg, key, index); return reg; } /*! Map value to ir::Register */ map scalarMap; /*! Map values to values when this is only a translation (eq bitcast) */ map valueMap; /*! Actually allocates the registers */ ir::Context &ctx; }; /*! Translate LLVM IR code to Gen IR code */ class GenWriter : public FunctionPass, public InstVisitor { /*! Unit to compute */ ir::Unit &unit; /*! Helper structure to compute the unit */ ir::Context ctx; /*! Make the LLVM-to-Gen translation */ RegisterTranslator regTranslator; /*! Map target basic block to its ir::LabelIndex */ map labelMap; /*! Condition inversion can simplify branch code. We store here all the * compare instructions we need to invert to decrease branch complexity */ set conditionSet; /*! We visit each function twice. Once to allocate the registers and once to * emit the Gen IR instructions */ enum Pass { PASS_EMIT_REGISTERS = 0, PASS_EMIT_INSTRUCTIONS = 1 } pass; LoopInfo *LI; const Module *TheModule; public: static char ID; explicit GenWriter(ir::Unit &unit) : FunctionPass(ID), unit(unit), ctx(unit), regTranslator(ctx), LI(0), TheModule(0) { initializeLoopInfoPass(*PassRegistry::getPassRegistry()); pass = PASS_EMIT_REGISTERS; } virtual const char *getPassName() const { return "Gen Back-End"; } void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); AU.setPreservesAll(); } virtual bool doInitialization(Module &M); /*! helper function for parsing global constant data */ void getConstantData(const Constant * c, void* mem, uint32_t& offset) const; void collectGlobalConstant(void) const; bool runOnFunction(Function &F) { // Do not codegen any 'available_externally' functions at all, they have // definitions outside the translation unit. if (F.hasAvailableExternallyLinkage()) return false; // As we inline all function calls, so skip non-kernel functions bool bKernel = isKernelFunction(F); if(!bKernel) return false; LI = &getAnalysis(); emitFunction(F); return false; } virtual bool doFinalization(Module &M) { return false; } /*! handle global variable register allocation (local, constant space) */ void allocateGlobalVariableRegister(Function &F); /*! Emit the complete function code and declaration */ void emitFunction(Function &F); /*! Handle input and output function parameters */ void emitFunctionPrototype(Function &F); /*! Emit the code for a basic block */ void emitBasicBlock(BasicBlock *BB); /*! Each block end may require to emit MOVs for further PHIs */ void emitMovForPHI(BasicBlock *curr, BasicBlock *succ); /*! Alocate one or several registers (if vector) for the value */ INLINE void newRegister(Value *value, Value *key = NULL); /*! get the register for a llvm::Constant */ ir::Register getConstantRegister(Constant *c, uint32_t index = 0); /*! Return a valid register from an operand (can use LOADI to make one) */ INLINE ir::Register getRegister(Value *value, uint32_t index = 0); /*! Create a new immediate from a constant */ ir::ImmediateIndex newImmediate(Constant *CPV, uint32_t index = 0); /*! Insert a new label index when this is a scalar value */ INLINE void newLabelIndex(const BasicBlock *bb); /*! Inspect the terminator instruction and try to see if we should invert * the value to simplify the code */ INLINE void simplifyTerminator(BasicBlock *bb); /*! Helper function to emit loads and stores */ template void emitLoadOrStore(T &I); /*! Will try to remove MOVs due to PHI resolution */ void removeMOVs(const ir::Liveness &liveness, ir::Function &fn); /*! Will try to remove redundants LOADI in basic blocks */ void removeLOADIs(const ir::Liveness &liveness, ir::Function &fn); /*! To avoid lost copy, we need two values for PHI. This function create a * fake value for the copy (basically ptr+1) */ INLINE Value *getPHICopy(Value *PHI); // Currently supported instructions #define DECL_VISIT_FN(NAME, TYPE) \ void regAllocate##NAME(TYPE &I); \ void emit##NAME(TYPE &I); \ void visit##NAME(TYPE &I) { \ if (pass == PASS_EMIT_INSTRUCTIONS) \ emit##NAME(I); \ else \ regAllocate##NAME(I); \ } DECL_VISIT_FN(BinaryOperator, Instruction); DECL_VISIT_FN(CastInst, CastInst); DECL_VISIT_FN(ReturnInst, ReturnInst); DECL_VISIT_FN(LoadInst, LoadInst); DECL_VISIT_FN(StoreInst, StoreInst); DECL_VISIT_FN(CallInst, CallInst); DECL_VISIT_FN(ICmpInst, ICmpInst); DECL_VISIT_FN(FCmpInst, FCmpInst); DECL_VISIT_FN(InsertElement, InsertElementInst); DECL_VISIT_FN(ExtractElement, ExtractElementInst); DECL_VISIT_FN(ShuffleVectorInst, ShuffleVectorInst); DECL_VISIT_FN(SelectInst, SelectInst); DECL_VISIT_FN(BranchInst, BranchInst); DECL_VISIT_FN(PHINode, PHINode); DECL_VISIT_FN(AllocaInst, AllocaInst); #undef DECL_VISIT_FN // Emit unary instructions from gen native function void emitUnaryCallInst(CallInst &I, CallSite &CS, ir::Opcode opcode); // Emit unary instructions from gen native function void emitAtomicInst(CallInst &I, CallSite &CS, ir::AtomicOps opcode); ir::Register appendSampler(CallSite::arg_iterator AI); // These instructions are not supported at all void visitVAArgInst(VAArgInst &I) {NOT_SUPPORTED;} void visitSwitchInst(SwitchInst &I) {NOT_SUPPORTED;} void visitInvokeInst(InvokeInst &I) {NOT_SUPPORTED;} #if LLVM_VERSION_MINOR == 0 void visitUnwindInst(UnwindInst &I) {NOT_SUPPORTED;} #endif /* __LLVM_30__ */ void visitResumeInst(ResumeInst &I) {NOT_SUPPORTED;} void visitInlineAsm(CallInst &I) {NOT_SUPPORTED;} void visitIndirectBrInst(IndirectBrInst &I) {NOT_SUPPORTED;} void visitUnreachableInst(UnreachableInst &I) {NOT_SUPPORTED;} void visitGetElementPtrInst(GetElementPtrInst &I) {NOT_SUPPORTED;} void visitInsertValueInst(InsertValueInst &I) {NOT_SUPPORTED;} void visitExtractValueInst(ExtractValueInst &I) {NOT_SUPPORTED;} template void visitLoadOrStore(T &I); void visitInstruction(Instruction &I) {NOT_SUPPORTED;} }; char GenWriter::ID = 0; void getSequentialData(const ConstantDataSequential *cda, void *ptr, uint32_t &offset) { StringRef data = cda->getRawDataValues(); memcpy((char*)ptr+offset, data.data(), data.size()); offset += data.size(); return; } void GenWriter::getConstantData(const Constant * c, void* mem, uint32_t& offset) const { Type * type = c->getType(); Type::TypeID id = type->getTypeID(); GBE_ASSERT(c); if(isa(c)) { uint32_t size = getTypeByteSize(unit, type); offset += size; return; } else if(isa(c)) { uint32_t size = getTypeByteSize(unit, type); memset((char*)mem+offset, 0, size); offset += size; return; } switch(id) { case Type::TypeID::StructTyID: { const StructType * strTy = cast(c->getType()); uint32_t size = 0; for(uint32_t op=0; op < strTy->getNumElements(); op++) { Type* elementType = strTy->getElementType(op); uint32_t align = 8 * getAlignmentByte(unit, elementType); uint32_t padding = getPadding(size, align); size += padding; size += getTypeBitSize(unit, elementType); offset += padding/8; const Constant* sub = cast(c->getOperand(op)); GBE_ASSERT(sub); getConstantData(sub, mem, offset); } break; } case Type::TypeID::ArrayTyID: { const ConstantDataSequential *cds = dyn_cast(c); if(cds) getSequentialData(cds, mem, offset); else { const ConstantArray *ca = dyn_cast(c); const ArrayType *arrTy = ca->getType(); Type* elemTy = arrTy->getElementType(); uint32_t elemSize = getTypeBitSize(unit, elemTy); uint32_t padding = getPadding(elemSize, 8 * getAlignmentByte(unit, elemTy)); padding /= 8; uint32_t ops = c->getNumOperands(); for(uint32_t op = 0; op < ops; ++op) { Constant * ca = dyn_cast(c->getOperand(op)); getConstantData(ca, mem, offset); offset += padding; } } break; } case Type::TypeID::VectorTyID: { const ConstantDataSequential *cds = dyn_cast(c); GBE_ASSERT(cds); getSequentialData(cds, mem, offset); break; } case Type::TypeID::IntegerTyID: { const ConstantInt *ci = dyn_cast(c); uint32_t size = ci->getBitWidth() / 8; uint64_t data = ci->isNegative() ? ci->getSExtValue() : ci->getZExtValue(); memcpy((char*)mem+offset, &data, size); offset += size; break; } case Type::TypeID::FloatTyID: { const ConstantFP *cf = dyn_cast(c); *(float *)((char*)mem + offset) = cf->getValueAPF().convertToFloat(); offset += sizeof(float); break; } case Type::TypeID::DoubleTyID: { const ConstantFP *cf = dyn_cast(c); *(double *)((char*)mem + offset) = cf->getValueAPF().convertToDouble(); offset += sizeof(double); break; } default: NOT_IMPLEMENTED; } } void GenWriter::collectGlobalConstant(void) const { const Module::GlobalListType &globalList = TheModule->getGlobalList(); for(auto i = globalList.begin(); i != globalList.end(); i ++) { const GlobalVariable &v = *i; const char *name = v.getName().data(); unsigned addrSpace = v.getType()->getAddressSpace(); if(addrSpace == ir::AddressSpace::MEM_CONSTANT) { GBE_ASSERT(v.hasInitializer()); const Constant *c = v.getInitializer(); Type * type = c->getType(); uint32_t size = getTypeByteSize(unit, type); void* mem = malloc(size); uint32_t offset = 0; getConstantData(c, mem, offset); unit.newConstant((char *)mem, name, size, sizeof(unsigned)); free(mem); } } } bool GenWriter::doInitialization(Module &M) { FunctionPass::doInitialization(M); // Initialize TheModule = &M; collectGlobalConstant(); return false; } template static U processConstant(Constant *CPV, T doIt, uint32_t index = 0u) { #if GBE_DEBUG GBE_ASSERTM(dyn_cast(CPV) == NULL, "Unsupported constant expression"); if (isa(CPV) && CPV->getType()->isSingleValueType()) GBE_ASSERTM(false, "Unsupported constant expression"); #endif /* GBE_DEBUG */ #if LLVM_VERSION_MINOR > 0 ConstantDataSequential *seq = dyn_cast(CPV); if (seq) { Type *Ty = seq->getElementType(); if (Ty == Type::getInt1Ty(CPV->getContext())) { const uint64_t u64 = seq->getElementAsInteger(index); return doIt(bool(u64)); } else if (Ty == Type::getInt8Ty(CPV->getContext())) { const uint64_t u64 = seq->getElementAsInteger(index); return doIt(uint8_t(u64)); } else if (Ty == Type::getInt16Ty(CPV->getContext())) { const uint64_t u64 = seq->getElementAsInteger(index); return doIt(uint16_t(u64)); } else if (Ty == Type::getInt32Ty(CPV->getContext())) { const uint64_t u64 = seq->getElementAsInteger(index); return doIt(uint32_t(u64)); } else if (Ty == Type::getInt64Ty(CPV->getContext())) { const uint64_t u64 = seq->getElementAsInteger(index); return doIt(u64); } else if (Ty == Type::getFloatTy(CPV->getContext())) { const float f32 = seq->getElementAsFloat(index); return doIt(f32); } else if (Ty == Type::getDoubleTy(CPV->getContext())) { const float f64 = seq->getElementAsDouble(index); return doIt(f64); } } else #endif /* LLVM_VERSION_MINOR > 0 */ if (dyn_cast(CPV)) { Type* Ty = CPV->getType(); if(Ty->isVectorTy()) Ty = (cast(Ty))->getElementType(); if (Ty == Type::getInt1Ty(CPV->getContext())) { const bool b = 0; return doIt(b); } else if (Ty == Type::getInt8Ty(CPV->getContext())) { const uint8_t u8 = 0; return doIt(u8); } else if (Ty == Type::getInt16Ty(CPV->getContext())) { const uint16_t u16 = 0; return doIt(u16); } else if (Ty == Type::getInt32Ty(CPV->getContext())) { const uint32_t u32 = 0; return doIt(u32); } else if (Ty == Type::getInt64Ty(CPV->getContext())) { const uint64_t u64 = 0; return doIt(u64); } else if (Ty == Type::getFloatTy(CPV->getContext())) { const float f32 = 0; return doIt(f32); } else if (Ty == Type::getDoubleTy(CPV->getContext())) { const float f64 = 0; return doIt(f64); } else { GBE_ASSERTM(false, "Unsupporte aggregate zero type."); return doIt(uint32_t(0)); } } else { if (dyn_cast(CPV)) CPV = extractConstantElem(CPV, index); GBE_ASSERTM(dyn_cast(CPV) == NULL, "Unsupported constant expression"); // Integers if (ConstantInt *CI = dyn_cast(CPV)) { Type* Ty = CI->getType(); if (Ty == Type::getInt1Ty(CPV->getContext())) { const bool b = CI->getZExtValue(); return doIt(b); } else if (Ty == Type::getInt8Ty(CPV->getContext())) { const uint8_t u8 = CI->getZExtValue(); return doIt(u8); } else if (Ty == Type::getInt16Ty(CPV->getContext())) { const uint16_t u16 = CI->getZExtValue(); return doIt(u16); } else if (Ty == Type::getInt32Ty(CPV->getContext())) { const uint32_t u32 = CI->getZExtValue(); return doIt(u32); } else if (Ty == Type::getInt64Ty(CPV->getContext())) { const uint64_t u64 = CI->getZExtValue(); return doIt(u64); } else { GBE_ASSERTM(false, "Unsupported integer size"); return doIt(uint64_t(0)); } } // NULL pointers if(isa(CPV)) { return doIt(uint32_t(0)); } // Floats and doubles const Type::TypeID typeID = CPV->getType()->getTypeID(); switch (typeID) { case Type::FloatTyID: case Type::DoubleTyID: { ConstantFP *FPC = cast(CPV); GBE_ASSERT(isa(CPV) == false); if (FPC->getType() == Type::getFloatTy(CPV->getContext())) { const float f32 = FPC->getValueAPF().convertToFloat(); return doIt(f32); } else { const double f64 = FPC->getValueAPF().convertToDouble(); return doIt(f64); } } break; default: GBE_ASSERTM(false, "Unsupported constant type"); break; } } GBE_ASSERTM(false, "Unsupported constant type"); return doIt(uint64_t(0)); } /*! Pfff. I cannot use a lambda, since it is templated. Congratulation c++ */ struct NewImmediateFunctor { NewImmediateFunctor(ir::Context &ctx) : ctx(ctx) {} template ir::ImmediateIndex operator() (const T &t) { return ctx.newImmediate(t); } ir::Context &ctx; }; ir::ImmediateIndex GenWriter::newImmediate(Constant *CPV, uint32_t index) { return processConstant(CPV, NewImmediateFunctor(ctx), index); } void GenWriter::newRegister(Value *value, Value *key) { auto type = value->getType(); auto typeID = type->getTypeID(); switch (typeID) { case Type::IntegerTyID: case Type::FloatTyID: case Type::DoubleTyID: case Type::PointerTyID: regTranslator.newScalar(value, key); break; case Type::VectorTyID: { auto vectorType = cast(type); const uint32_t elemNum = vectorType->getNumElements(); for (uint32_t elemID = 0; elemID < elemNum; ++elemID) regTranslator.newScalar(value, key, elemID); break; } default: NOT_SUPPORTED; }; } ir::Register GenWriter::getConstantRegister(Constant *c, uint32_t elemID) { GBE_ASSERT(c != NULL); if(isa(c)) { return regTranslator.getScalar(c, elemID); } if(isa(c)) { ConstantExpr * ce = dyn_cast(c); if(ce->isCast()) { Value* op = ce->getOperand(0); ir::Register pointer_reg; if(isa(op)) { // try to get the real pointer register, for case like: // store i64 ptrtoint (i8 addrspace(3)* getelementptr inbounds ... // in which ptrtoint and getelementptr are ConstantExpr. pointer_reg = getConstantRegister(dyn_cast(op), elemID); } else { pointer_reg = regTranslator.getScalar(op, elemID); } // if ptrToInt request another type other than 32bit, convert as requested ir::Type dstType = getType(ctx, ce->getType()); if(ce->getOpcode() == Instruction::PtrToInt && ir::TYPE_S32 != dstType) { ir::Register tmp = ctx.reg(getFamily(dstType)); ctx.CVT(dstType, ir::TYPE_S32, tmp, pointer_reg); return tmp; } return pointer_reg; } else { uint32_t TypeIndex; uint32_t constantOffset = 0; uint32_t offset = 0; // currently only GetElementPtr is handled GBE_ASSERT(ce->getOpcode() == Instruction::GetElementPtr); Value *pointer = ce->getOperand(0); CompositeType* CompTy = cast(pointer->getType()); for(uint32_t op=1; opgetNumOperands(); ++op) { ConstantInt* ConstOP = dyn_cast(ce->getOperand(op)); GBE_ASSERT(ConstOP); TypeIndex = ConstOP->getZExtValue(); for(uint32_t ty_i=0; ty_igetTypeAtIndex(ty_i); uint32_t align = getAlignmentByte(unit, elementType); offset += getPadding(offset, align); offset += getTypeByteSize(unit, elementType); } const uint32_t align = getAlignmentByte(unit, CompTy->getTypeAtIndex(TypeIndex)); offset += getPadding(offset, align); constantOffset += offset; CompTy = dyn_cast(CompTy->getTypeAtIndex(TypeIndex)); } ir::Register pointer_reg; pointer_reg = regTranslator.getScalar(pointer, elemID); ir::Register offset_reg = ctx.reg(ir::RegisterFamily::FAMILY_DWORD); ctx.LOADI(ir::Type::TYPE_S32, offset_reg, ctx.newIntegerImmediate(constantOffset, ir::Type::TYPE_S32)); ir::Register reg = ctx.reg(ir::RegisterFamily::FAMILY_DWORD); ctx.ADD(ir::Type::TYPE_S32, reg, pointer_reg, offset_reg); return reg; } } const ir::ImmediateIndex immIndex = this->newImmediate(c, elemID); const ir::Immediate imm = ctx.getImmediate(immIndex); const ir::Register reg = ctx.reg(getFamily(imm.type)); ctx.LOADI(imm.type, reg, immIndex); return reg; } ir::Register GenWriter::getRegister(Value *value, uint32_t elemID) { //the real value may be constant, so get real value before constant check regTranslator.getRealValue(value, elemID); if(isa(value)) { Constant *c = dyn_cast(value); return getConstantRegister(c, elemID); } else return regTranslator.getScalar(value, elemID); } INLINE Value *GenWriter::getPHICopy(Value *PHI) { const uintptr_t ptr = (uintptr_t) PHI; return (Value*) (ptr+1); } void GenWriter::newLabelIndex(const BasicBlock *bb) { if (labelMap.find(bb) == labelMap.end()) { const ir::LabelIndex label = ctx.label(); labelMap[bb] = label; } } void GenWriter::simplifyTerminator(BasicBlock *bb) { Value *value = --bb->end(); BranchInst *I = NULL; if ((I = dyn_cast(value)) != NULL) { if (I->isConditional() == false) return; // If the "taken" successor is the next block, we try to invert the // branch. BasicBlock *succ = I->getSuccessor(0); if (llvm::next(Function::iterator(bb)) != Function::iterator(succ)) return; // More than one use is too complicated: we skip it Value *condition = I->getCondition(); if (condition->hasOneUse() == false) return; // Right now, we only invert comparison instruction ICmpInst *CI = dyn_cast(condition); if (CI != NULL) { GBE_ASSERT(conditionSet.find(CI) == conditionSet.end()); conditionSet.insert(CI); return; } } } void GenWriter::emitBasicBlock(BasicBlock *BB) { GBE_ASSERT(labelMap.find(BB) != labelMap.end()); ctx.LABEL(labelMap[BB]); for (auto II = BB->begin(), E = BB->end(); II != E; ++II) visit(*II); } void GenWriter::emitMovForPHI(BasicBlock *curr, BasicBlock *succ) { for (BasicBlock::iterator I = succ->begin(); isa(I); ++I) { PHINode *PN = cast(I); Value *IV = PN->getIncomingValueForBlock(curr); if (!isa(IV)) { Type *llvmType = PN->getType(); GBE_ASSERTM(llvmType != Type::getInt1Ty(llvmType->getContext()), "TODO Boolean values cannot escape their definition basic block"); const ir::Type type = getType(ctx, llvmType); // Emit the MOV required by the PHI function. We do it simple and do not // try to optimize them. A next data flow analysis pass on the Gen IR // will remove them Value *PHICopy = this->getPHICopy(PN); const ir::Register dst = this->getRegister(PHICopy); Constant *CP = dyn_cast(IV); if (CP) { GBE_ASSERT(isa(CP) == false); ConstantVector *CPV = dyn_cast(CP); if (CPV && dyn_cast(CPV) && isa(extractConstantElem(CPV, 0))) continue; const ir::ImmediateIndex immIndex = this->newImmediate(CP); const ir::Immediate imm = ctx.getImmediate(immIndex); ctx.LOADI(imm.type, dst, immIndex); } else if (regTranslator.valueExists(IV,0) || dyn_cast(IV)) { const ir::Register src = this->getRegister(IV); ctx.MOV(type, dst, src); } } } } void GenWriter::emitFunctionPrototype(Function &F) { GBE_ASSERTM(F.hasStructRetAttr() == false, "Returned value for kernel functions is forbidden"); // Loop over the arguments and output registers for them if (!F.arg_empty()) { uint32_t argID = 0; Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); // Insert a new register for each function argument #if LLVM_VERSION_MINOR <= 1 const AttrListPtr &PAL = F.getAttributes(); #endif /* LLVM_VERSION_MINOR <= 1 */ for (; I != E; ++I, ++argID) { const std::string &argName = I->getName().str(); Type *type = I->getType(); //add support for vector argument if(type->isVectorTy()) { VectorType *vectorType = cast(type); this->newRegister(I); ir::Register reg = getRegister(I, 0); Type *elemType = vectorType->getElementType(); const uint32_t elemSize = getTypeByteSize(unit, elemType); const uint32_t elemNum = vectorType->getNumElements(); //vector's elemType always scalar type ctx.input(argName, ir::FunctionArgument::VALUE, reg, elemNum*elemSize); ir::Function& fn = ctx.getFunction(); for(uint32_t i=1; i < elemNum; i++) { ir::PushLocation argLocation(fn, argID, elemSize*i); reg = getRegister(I, i); ctx.appendPushedConstant(reg, argLocation); //add to push map for reg alloc } continue; } GBE_ASSERTM(isScalarType(type) == true, "vector type in the function argument is not supported yet"); const ir::Register reg = regTranslator.newScalar(I); if (type->isPointerTy() == false) ctx.input(argName, ir::FunctionArgument::VALUE, reg, getTypeByteSize(unit, type)); else { PointerType *pointerType = dyn_cast(type); // By value structure #if LLVM_VERSION_MINOR <= 1 if (PAL.paramHasAttr(argID+1, Attribute::ByVal)) { #else if (I->hasByValAttr()) { #endif /* LLVM_VERSION_MINOR <= 1 */ Type *pointed = pointerType->getElementType(); const size_t structSize = getTypeByteSize(unit, pointed); ctx.input(argName, ir::FunctionArgument::STRUCTURE, reg, structSize); } // Regular user provided pointer (global, local or constant) else { const uint32_t addr = pointerType->getAddressSpace(); const ir::AddressSpace addrSpace = addressSpaceLLVMToGen(addr); const uint32_t ptrSize = getTypeByteSize(unit, type); switch (addrSpace) { case ir::MEM_GLOBAL: ctx.input(argName, ir::FunctionArgument::GLOBAL_POINTER, reg, ptrSize); break; case ir::MEM_LOCAL: ctx.input(argName, ir::FunctionArgument::LOCAL_POINTER, reg, ptrSize); ctx.getFunction().setUseSLM(true); break; case ir::MEM_CONSTANT: ctx.input(argName, ir::FunctionArgument::CONSTANT_POINTER, reg, ptrSize); break; case ir::IMAGE: ctx.input(argName, ir::FunctionArgument::IMAGE, reg, ptrSize); ctx.getFunction().getImageSet()->append(reg, &ctx); break; default: GBE_ASSERT(addrSpace != ir::MEM_PRIVATE); } } } } } // When returning a structure, first input register is the pointer to the // structure #if GBE_DEBUG const Type *type = F.getReturnType(); GBE_ASSERTM(type->isVoidTy() == true, "Returned value for kernel functions is forbidden"); // Variable number of arguments is not supported FunctionType *FT = cast(F.getFunctionType()); GBE_ASSERT(FT->isVarArg() == false); #endif /* GBE_DEBUG */ } static inline bool isFPIntBitCast(const Instruction &I) { if (!isa(I)) return false; Type *SrcTy = I.getOperand(0)->getType(); Type *DstTy = I.getType(); return (SrcTy->isFloatingPointTy() && DstTy->isIntegerTy()) || (DstTy->isFloatingPointTy() && SrcTy->isIntegerTy()); } /*! To track last read and write of the registers */ struct RegInfoForMov { ir::Instruction *lastWriteInsn; ir::Instruction *lastReadInsn; uint32_t lastWrite; uint32_t lastRead; }; /*! Replace register "from" by register "to" in the destination(s) */ static void replaceDst(ir::Instruction *insn, ir::Register from, ir::Register to) { const uint32_t dstNum = insn->getDstNum(); for (uint32_t dstID = 0; dstID < dstNum; ++dstID) if (insn->getDst(dstID) == from) insn->setDst(dstID, to); } /*! Replace register "from" by register "to" in the source(s) */ static void replaceSrc(ir::Instruction *insn, ir::Register from, ir::Register to) { const uint32_t srcNum = insn->getSrcNum(); for (uint32_t srcID = 0; srcID < srcNum; ++srcID) if (insn->getSrc(srcID) == from) insn->setSrc(srcID, to); } /*! lastUse maintains data about last uses (reads/writes) for each * ir::Register */ static void buildRegInfo(ir::BasicBlock &bb, vector &lastUse) { // Clear the register usages for (auto &x : lastUse) { x.lastWrite = x.lastRead = 0; x.lastWriteInsn = x.lastReadInsn = NULL; } // Find use intervals for all registers (distinguish sources and // destinations) uint32_t insnID = 2; bb.foreach([&](ir::Instruction &insn) { const uint32_t dstNum = insn.getDstNum(); const uint32_t srcNum = insn.getSrcNum(); for (uint32_t srcID = 0; srcID < srcNum; ++srcID) { const ir::Register reg = insn.getSrc(srcID); lastUse[reg].lastRead = insnID; lastUse[reg].lastReadInsn = &insn; } for (uint32_t dstID = 0; dstID < dstNum; ++dstID) { const ir::Register reg = insn.getDst(dstID); lastUse[reg].lastWrite = insnID+1; lastUse[reg].lastWriteInsn = &insn; } insnID+=2; }); } void GenWriter::removeMOVs(const ir::Liveness &liveness, ir::Function &fn) { // We store the last write and last read for each register const uint32_t regNum = fn.regNum(); vector lastUse; lastUse.resize(regNum); // Remove the MOVs per block (local analysis only) Note that we do not try // to remove MOV for variables that outlives the block. So we use liveness // information to figure out which variable is alive fn.foreachBlock([&](ir::BasicBlock &bb) { // We need to know when each register will be read or written buildRegInfo(bb, lastUse); // Liveinfo helps us to know if the source outlives the block const ir::Liveness::BlockInfo &info = liveness.getBlockInfo(&bb); auto it = --bb.end(); if (it->isMemberOf() == true) --it; for (auto it = --bb.end(); it != bb.end();) { ir::Instruction *insn = &*it; it--; const ir::Opcode op = insn->getOpcode(); if (op == ir::OP_MOV) { const ir::Register dst = insn->getDst(0); const ir::Register src = insn->getSrc(0); // Outlives the block. We do not do anything if (info.inLiveOut(src)) continue; const RegInfoForMov &dstInfo = lastUse[dst]; const RegInfoForMov &srcInfo = lastUse[src]; // The source is not computed in this block if (srcInfo.lastWrite == 0) continue; // dst is read after src is written. We cannot overwrite dst if (dstInfo.lastRead > srcInfo.lastWrite) continue; // We are good. We first patch the destination then all the sources replaceDst(srcInfo.lastWriteInsn, src, dst); // Then we patch all subsequent uses of the source ir::Instruction *next = static_cast(srcInfo.lastWriteInsn->next); while (next != insn) { replaceSrc(next, src, dst); next = static_cast(next->next); } insn->remove(); } else if (op == ir::OP_LOADI) continue; else break; } }); } void GenWriter::removeLOADIs(const ir::Liveness &liveness, ir::Function &fn) { // We store the last write and last read for each register const uint32_t regNum = fn.regNum(); vector lastUse; lastUse.resize(regNum); // Traverse all blocks and remove redundant immediates. Do *not* remove // immediates that outlive the block fn.foreachBlock([&](ir::BasicBlock &bb) { // Each immediate that is already loaded in the block map loadedImm; // Immediate to immediate translation map immTranslate; // Liveinfo helps us to know if the loaded immediate outlives the block const ir::Liveness::BlockInfo &info = liveness.getBlockInfo(&bb); // We need to know when each register will be read or written buildRegInfo(bb, lastUse); // Top bottom traversal -> remove useless LOADIs uint32_t insnID = 2; bb.foreach([&](ir::Instruction &insn) { // We either try to remove the LOADI or we will try to use it as a // replacement for the next same LOADIs if (insn.isMemberOf()) { ir::LoadImmInstruction &loadImm = cast(insn); const ir::Immediate imm = loadImm.getImmediate(); const ir::Register dst = loadImm.getDst(0); // Not here: cool, we put it in the map if the register is not // overwritten. If it is, we just ignore it for simplicity. Note that // it should not happen with the way we "unSSA" the code auto it = loadedImm.find(imm); auto end = loadedImm.end(); if (it == end && lastUse[dst].lastWrite == insnID+1) loadedImm.insert(std::make_pair(imm, dst)); // We already pushed the same immediate and we do not outlive the // block. We are good to replace this immediate by the previous one else if (it != end && info.inLiveOut(dst) == false) { immTranslate.insert(std::make_pair(dst, it->second)); insn.remove(); } } // Traverse all the destinations and sources and perform the // substitutions (if any) else { const uint32_t srcNum = insn.getSrcNum(); const uint32_t dstNum = insn.getSrcNum(); for (uint32_t srcID = 0; srcID < srcNum; ++srcID) { const ir::Register src = insn.getSrc(srcID); auto it = immTranslate.find(src); if (it != immTranslate.end()) insn.setSrc(srcID, it->second); } for (uint32_t dstID = 0; dstID < dstNum; ++dstID) { const ir::Register dst = insn.getSrc(dstID); auto it = immTranslate.find(dst); if (it != immTranslate.end()) insn.setDst(dstID, it->second); } } insnID += 2; }); }); } BVAR(OCL_OPTIMIZE_PHI_MOVES, true); BVAR(OCL_OPTIMIZE_LOADI, true); void GenWriter::allocateGlobalVariableRegister(Function &F) { // Allocate a address register for each global variable const Module::GlobalListType &globalList = TheModule->getGlobalList(); size_t j = 0; for(auto i = globalList.begin(); i != globalList.end(); i ++) { const GlobalVariable &v = *i; if(!v.isConstantUsed()) continue; ir::AddressSpace addrSpace = addressSpaceLLVMToGen(v.getType()->getAddressSpace()); if(addrSpace == ir::MEM_LOCAL) { ir::Function &f = ctx.getFunction(); f.setUseSLM(true); const Constant *c = v.getInitializer(); Type *ty = c->getType(); uint32_t oldSlm = f.getSLMSize(); uint32_t align = 8 * getAlignmentByte(unit, ty); uint32_t padding = getPadding(oldSlm*8, align); f.setSLMSize(oldSlm + padding/8 + getTypeByteSize(unit, ty)); const Value * parent = cast(&v); // local variable can only be used in one kernel function. so, don't need to check its all uses. // loop through the Constant to find the instruction that use the global variable do { Value::const_use_iterator it = parent->use_begin(); parent = cast(*it); } while(isa(parent)); const Instruction * insn = cast(parent); const BasicBlock * bb = insn->getParent(); const Function * func = bb->getParent(); if(func != &F) continue; this->newRegister(const_cast(&v)); ir::Register reg = regTranslator.getScalar(const_cast(&v), 0); ctx.LOADI(ir::TYPE_S32, reg, ctx.newIntegerImmediate(oldSlm + padding/8, ir::TYPE_S32)); } else if(addrSpace == ir::MEM_CONSTANT) { GBE_ASSERT(v.hasInitializer()); this->newRegister(const_cast(&v)); ir::Register reg = regTranslator.getScalar(const_cast(&v), 0); ir::Constant &con = unit.getConstantSet().getConstant(j ++); ctx.LOADI(ir::TYPE_S32, reg, ctx.newIntegerImmediate(con.getOffset(), ir::TYPE_S32)); } else { GBE_ASSERT(0); } } } void GenWriter::emitFunction(Function &F) { switch (F.getCallingConv()) { #if LLVM_VERSION_MINOR <= 2 case CallingConv::PTX_Device: // we do not emit device function return; case CallingConv::PTX_Kernel: #else case CallingConv::C: #endif break; default: GBE_ASSERTM(false, "Unsupported calling convention"); } ctx.startFunction(F.getName()); this->regTranslator.clear(); this->labelMap.clear(); this->emitFunctionPrototype(F); this->allocateGlobalVariableRegister(F); // Visit all the instructions and emit the IR registers or the value to // value mapping when a new register is not needed pass = PASS_EMIT_REGISTERS; for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) visit(*I); // First create all the labels (one per block) ... for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) this->newLabelIndex(BB); // Then, for all branch instructions that have conditions, see if we can // simplify the code by inverting condition code for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) this->simplifyTerminator(BB); // ... then, emit the instructions for all basic blocks pass = PASS_EMIT_INSTRUCTIONS; for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) emitBasicBlock(BB); ir::Function &fn = ctx.getFunction(); ctx.endFunction(); // Liveness can be shared when we optimized the immediates and the MOVs const ir::Liveness liveness(fn); if (OCL_OPTIMIZE_LOADI) this->removeLOADIs(liveness, fn); if (OCL_OPTIMIZE_PHI_MOVES) this->removeMOVs(liveness, fn); } void GenWriter::regAllocateReturnInst(ReturnInst &I) {} void GenWriter::emitReturnInst(ReturnInst &I) { const ir::Function &fn = ctx.getFunction(); GBE_ASSERTM(fn.outputNum() <= 1, "no more than one value can be returned"); if (fn.outputNum() == 1 && I.getNumOperands() > 0) { const ir::Register dst = fn.getOutput(0); const ir::Register src = this->getRegister(I.getOperand(0)); const ir::RegisterFamily family = fn.getRegisterFamily(dst); ctx.MOV(ir::getType(family), dst, src); } ctx.RET(); } void GenWriter::regAllocateBinaryOperator(Instruction &I) { this->newRegister(&I); } void GenWriter::emitBinaryOperator(Instruction &I) { #if GBE_DEBUG GBE_ASSERT(I.getType()->isPointerTy() == false); // We accept logical operations on booleans switch (I.getOpcode()) { case Instruction::And: case Instruction::Or: case Instruction::Xor: break; default: GBE_ASSERT(I.getType() != Type::getInt1Ty(I.getContext())); } #endif /* GBE_DEBUG */ // Get the element type for a vector const ir::Type type = getType(ctx, I.getType()); // Emit the instructions in a row const ir::Register dst = this->getRegister(&I); const ir::Register src0 = this->getRegister(I.getOperand(0)); const ir::Register src1 = this->getRegister(I.getOperand(1)); switch (I.getOpcode()) { case Instruction::Add: case Instruction::FAdd: ctx.ADD(type, dst, src0, src1); break; case Instruction::Sub: case Instruction::FSub: ctx.SUB(type, dst, src0, src1); break; case Instruction::Mul: case Instruction::FMul: ctx.MUL(type, dst, src0, src1); break; case Instruction::URem: ctx.REM(getUnsignedType(ctx, I.getType()), dst, src0, src1); break; case Instruction::SRem: case Instruction::FRem: ctx.REM(type, dst, src0, src1); break; case Instruction::UDiv: ctx.DIV(getUnsignedType(ctx, I.getType()), dst, src0, src1); break; case Instruction::SDiv: case Instruction::FDiv: ctx.DIV(type, dst, src0, src1); break; case Instruction::And: ctx.AND(type, dst, src0, src1); break; case Instruction::Or: ctx.OR(type, dst, src0, src1); break; case Instruction::Xor: ctx.XOR(type, dst, src0, src1); break; case Instruction::Shl: ctx.SHL(type, dst, src0, src1); break; case Instruction::LShr: ctx.SHR(getUnsignedType(ctx, I.getType()), dst, src0, src1); break; case Instruction::AShr: ctx.ASR(type, dst, src0, src1); break; default: NOT_SUPPORTED; } } void GenWriter::regAllocateICmpInst(ICmpInst &I) { this->newRegister(&I); } static ir::Type makeTypeSigned(const ir::Type &type) { if (type == ir::TYPE_U8) return ir::TYPE_S8; else if (type == ir::TYPE_U16) return ir::TYPE_S16; else if (type == ir::TYPE_U32) return ir::TYPE_S32; else if (type == ir::TYPE_U64) return ir::TYPE_S64; return type; } static ir::Type makeTypeUnsigned(const ir::Type &type) { if (type == ir::TYPE_S8) return ir::TYPE_U8; else if (type == ir::TYPE_S16) return ir::TYPE_U16; else if (type == ir::TYPE_S32) return ir::TYPE_U32; else if (type == ir::TYPE_S64) return ir::TYPE_U64; return type; } void GenWriter::emitICmpInst(ICmpInst &I) { GBE_ASSERT(I.getOperand(0)->getType() != Type::getInt1Ty(I.getContext())); // Get the element type and the number of elements Type *operandType = I.getOperand(0)->getType(); const ir::Type type = getType(ctx, operandType); const ir::Type signedType = makeTypeSigned(type); const ir::Type unsignedType = makeTypeUnsigned(type); // Emit the instructions in a row const ir::Register dst = this->getRegister(&I); const ir::Register src0 = this->getRegister(I.getOperand(0)); const ir::Register src1 = this->getRegister(I.getOperand(1)); // We must invert the condition to simplify the branch code if (conditionSet.find(&I) != conditionSet.end()) { switch (I.getPredicate()) { case ICmpInst::ICMP_EQ: ctx.NE(type, dst, src0, src1); break; case ICmpInst::ICMP_NE: ctx.EQ(type, dst, src0, src1); break; case ICmpInst::ICMP_ULE: ctx.GT((unsignedType), dst, src0, src1); break; case ICmpInst::ICMP_SLE: ctx.GT(signedType, dst, src0, src1); break; case ICmpInst::ICMP_UGE: ctx.LT(unsignedType, dst, src0, src1); break; case ICmpInst::ICMP_SGE: ctx.LT(signedType, dst, src0, src1); break; case ICmpInst::ICMP_ULT: ctx.GE(unsignedType, dst, src0, src1); break; case ICmpInst::ICMP_SLT: ctx.GE(signedType, dst, src0, src1); break; case ICmpInst::ICMP_UGT: ctx.LE(unsignedType, dst, src0, src1); break; case ICmpInst::ICMP_SGT: ctx.LE(signedType, dst, src0, src1); break; default: NOT_SUPPORTED; } } // Nothing special to do else { switch (I.getPredicate()) { case ICmpInst::ICMP_EQ: ctx.EQ(type, dst, src0, src1); break; case ICmpInst::ICMP_NE: ctx.NE(type, dst, src0, src1); break; case ICmpInst::ICMP_ULE: ctx.LE((unsignedType), dst, src0, src1); break; case ICmpInst::ICMP_SLE: ctx.LE(signedType, dst, src0, src1); break; case ICmpInst::ICMP_UGE: ctx.GE(unsignedType, dst, src0, src1); break; case ICmpInst::ICMP_SGE: ctx.GE(signedType, dst, src0, src1); break; case ICmpInst::ICMP_ULT: ctx.LT(unsignedType, dst, src0, src1); break; case ICmpInst::ICMP_SLT: ctx.LT(signedType, dst, src0, src1); break; case ICmpInst::ICMP_UGT: ctx.GT(unsignedType, dst, src0, src1); break; case ICmpInst::ICMP_SGT: ctx.GT(signedType, dst, src0, src1); break; default: NOT_SUPPORTED; } } } void GenWriter::regAllocateFCmpInst(FCmpInst &I) { this->newRegister(&I); } void GenWriter::emitFCmpInst(FCmpInst &I) { // Get the element type and the number of elements Type *operandType = I.getOperand(0)->getType(); const ir::Type type = getType(ctx, operandType); // Emit the instructions in a row const ir::Register dst = this->getRegister(&I); const ir::Register src0 = this->getRegister(I.getOperand(0)); const ir::Register src1 = this->getRegister(I.getOperand(1)); switch (I.getPredicate()) { case ICmpInst::FCMP_OEQ: case ICmpInst::FCMP_UEQ: ctx.EQ(type, dst, src0, src1); break; case ICmpInst::FCMP_ONE: case ICmpInst::FCMP_UNE: ctx.NE(type, dst, src0, src1); break; case ICmpInst::FCMP_OLE: case ICmpInst::FCMP_ULE: ctx.LE(type, dst, src0, src1); break; case ICmpInst::FCMP_OGE: case ICmpInst::FCMP_UGE: ctx.GE(type, dst, src0, src1); break; case ICmpInst::FCMP_OLT: case ICmpInst::FCMP_ULT: ctx.LT(type, dst, src0, src1); break; case ICmpInst::FCMP_OGT: case ICmpInst::FCMP_UGT: ctx.GT(type, dst, src0, src1); break; default: NOT_SUPPORTED; } } void GenWriter::regAllocateCastInst(CastInst &I) { Value *dstValue = &I; Value *srcValue = I.getOperand(0); const auto op = I.getOpcode(); switch (op) { // When casting pointer to integers, be aware with integers case Instruction::PtrToInt: case Instruction::IntToPtr: { Constant *CPV = dyn_cast(srcValue); if (CPV == NULL) { #if GBE_DEBUG Type *dstType = dstValue->getType(); Type *srcType = srcValue->getType(); GBE_ASSERT(getTypeByteSize(unit, dstType) == getTypeByteSize(unit, srcType)); #endif /* GBE_DEBUG */ regTranslator.newValueProxy(srcValue, dstValue); } else this->newRegister(dstValue); } break; // Bitcast just forward registers case Instruction::BitCast: { regTranslator.newValueProxy(srcValue, dstValue); } break; // Various conversion operations -> just allocate registers for them case Instruction::FPToUI: case Instruction::FPToSI: case Instruction::SIToFP: case Instruction::UIToFP: case Instruction::SExt: case Instruction::ZExt: case Instruction::FPExt: case Instruction::FPTrunc: case Instruction::Trunc: this->newRegister(&I); break; default: NOT_SUPPORTED; } } void GenWriter::emitCastInst(CastInst &I) { switch (I.getOpcode()) { case Instruction::PtrToInt: case Instruction::IntToPtr: { Value *dstValue = &I; Value *srcValue = I.getOperand(0); Constant *CPV = dyn_cast(srcValue); if (CPV != NULL) { const ir::ImmediateIndex index = ctx.newImmediate(CPV); const ir::Immediate imm = ctx.getImmediate(index); const ir::Register reg = this->getRegister(dstValue); ctx.LOADI(imm.type, reg, index); } } break; case Instruction::BitCast: break; // nothing to emit here case Instruction::FPToUI: case Instruction::FPToSI: case Instruction::SIToFP: case Instruction::UIToFP: case Instruction::SExt: case Instruction::ZExt: case Instruction::FPExt: case Instruction::FPTrunc: case Instruction::Trunc: { // Get the element type for a vector Type *llvmDstType = I.getType(); Type *llvmSrcType = I.getOperand(0)->getType(); const ir::Type dstType = getType(ctx, llvmDstType); ir::Type srcType; if (I.getOpcode() == Instruction::ZExt || I.getOpcode() == Instruction::UIToFP) { srcType = getUnsignedType(ctx, llvmSrcType); } else { srcType = getType(ctx, llvmSrcType); } // We use a select (0,1) not a convert when the destination is a boolean if (srcType == ir::TYPE_BOOL) { const ir::RegisterFamily family = getFamily(dstType); const ir::ImmediateIndex zero = ctx.newIntegerImmediate(0, dstType); ir::ImmediateIndex one; if (I.getOpcode() == Instruction::SExt && (dstType == ir::TYPE_S8 || dstType == ir::TYPE_S16 || dstType == ir::TYPE_S32)) one = ctx.newIntegerImmediate(-1, dstType); else one = ctx.newIntegerImmediate(1, dstType); const ir::Register zeroReg = ctx.reg(family); const ir::Register oneReg = ctx.reg(family); ctx.LOADI(dstType, zeroReg, zero); ctx.LOADI(dstType, oneReg, one); const ir::Register dst = this->getRegister(&I); const ir::Register src = this->getRegister(I.getOperand(0)); ctx.SEL(dstType, dst, src, oneReg, zeroReg); } // Use a convert for the other cases else { const ir::Register dst = this->getRegister(&I); const ir::Register src = this->getRegister(I.getOperand(0)); ctx.CVT(dstType, srcType, dst, src); } } break; default: NOT_SUPPORTED; } } /*! Once again, it is a templated functor. No lambda */ struct InsertExtractFunctor { InsertExtractFunctor(ir::Context &ctx) : ctx(ctx) {} template ir::Immediate operator() (const T &t) { return ir::Immediate(t); } ir::Context &ctx; }; /*! Because there are still fake insert/extract instruction for * load/store, so keep empty function here */ void GenWriter::regAllocateInsertElement(InsertElementInst &I) {} void GenWriter::emitInsertElement(InsertElementInst &I) { const VectorType *type = dyn_cast(I.getType()); GBE_ASSERT(type); const int elemNum = type->getNumElements(); Value *vec = I.getOperand(0); Value *value = I.getOperand(1); const Value *index = I.getOperand(2); const ConstantInt *c = dyn_cast(index); int i = c->getValue().getSExtValue(); for(int j=0; j(index); GBE_ASSERT(c); int i = c->getValue().getSExtValue(); regTranslator.newValueProxy(vec, &I, i, 0); } void GenWriter::regAllocateShuffleVectorInst(ShuffleVectorInst &I) {} void GenWriter::emitShuffleVectorInst(ShuffleVectorInst &I) {} void GenWriter::regAllocateSelectInst(SelectInst &I) { this->newRegister(&I); } void GenWriter::emitSelectInst(SelectInst &I) { // Get the element type for a vector const ir::Type type = getType(ctx, I.getType()); // Emit the instructions in a row const ir::Register dst = this->getRegister(&I); const ir::Register cond = this->getRegister(I.getOperand(0)); const ir::Register src0 = this->getRegister(I.getOperand(1)); const ir::Register src1 = this->getRegister(I.getOperand(2)); ctx.SEL(type, dst, cond, src0, src1); } void GenWriter::regAllocatePHINode(PHINode &I) { // Copy 1 for the PHI this->newRegister(&I); // Copy 2 to avoid lost copy issue Value *copy = this->getPHICopy(&I); this->newRegister(&I, copy); } void GenWriter::emitPHINode(PHINode &I) { Value *copy = this->getPHICopy(&I); const ir::Type type = getType(ctx, I.getType()); const ir::Register dst = this->getRegister(&I); const ir::Register src = this->getRegister(copy); ctx.MOV(type, dst, src); } void GenWriter::regAllocateBranchInst(BranchInst &I) {} void GenWriter::emitBranchInst(BranchInst &I) { // Emit MOVs if required BasicBlock *bb = I.getParent(); this->emitMovForPHI(bb, I.getSuccessor(0)); if (I.isConditional()) this->emitMovForPHI(bb, I.getSuccessor(1)); // Inconditional branch. Just check that we jump to a block which is not our // successor if (I.isConditional() == false) { BasicBlock *target = I.getSuccessor(0); if (llvm::next(Function::iterator(bb)) != Function::iterator(target)) { GBE_ASSERT(labelMap.find(target) != labelMap.end()); const ir::LabelIndex labelIndex = labelMap[target]; ctx.BRA(labelIndex); } } // The LLVM branch has two targets else { BasicBlock *taken = NULL, *nonTaken = NULL; Value *condition = I.getCondition(); // We may inverted the branch condition to simplify the branching code const bool inverted = conditionSet.find(condition) != conditionSet.end(); taken = inverted ? I.getSuccessor(1) : I.getSuccessor(0); nonTaken = inverted ? I.getSuccessor(0) : I.getSuccessor(1); // Get both taken label and predicate register GBE_ASSERT(labelMap.find(taken) != labelMap.end()); const ir::LabelIndex index = labelMap[taken]; const ir::Register reg = this->getRegister(condition); ctx.BRA(index, reg); // If non-taken target is the next block, there is nothing to do BasicBlock *bb = I.getParent(); if (llvm::next(Function::iterator(bb)) == Function::iterator(nonTaken)) return; // This is slightly more complicated here. We need to issue one more // branch for the non-taken condition. GBE_ASSERT(labelMap.find(nonTaken) != labelMap.end()); const ir::LabelIndex untakenIndex = ctx.label(); ctx.LABEL(untakenIndex); ctx.BRA(labelMap[nonTaken]); } } void GenWriter::regAllocateCallInst(CallInst &I) { Value *dst = &I; Value *Callee = I.getCalledValue(); GBE_ASSERT(ctx.getFunction().getProfile() == ir::PROFILE_OCL); GBE_ASSERT(isa(I.getCalledValue()) == false); GBE_ASSERT(I.hasStructRetAttr() == false); // We only support a small number of intrinsics right now if (Function *F = I.getCalledFunction()) { const Intrinsic::ID intrinsicID = (Intrinsic::ID) F->getIntrinsicID(); if (intrinsicID != 0) { switch (F->getIntrinsicID()) { case Intrinsic::stacksave: this->newRegister(&I); break; case Intrinsic::stackrestore: break; #if LLVM_VERSION_MINOR >= 2 case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: break; case Intrinsic::fmuladd: this->newRegister(&I); break; #endif /* LLVM_VERSION_MINOR >= 2 */ default: GBE_ASSERTM(false, "Unsupported intrinsics"); } return; } } // Get the name of the called function and handle it const std::string fnName = Callee->getName(); auto it = instrinsicMap.map.find(fnName); GBE_ASSERT(it != instrinsicMap.map.end()); switch (it->second) { case GEN_OCL_GET_GROUP_ID0: regTranslator.newScalarProxy(ir::ocl::groupid0, dst); break; case GEN_OCL_GET_GROUP_ID1: regTranslator.newScalarProxy(ir::ocl::groupid1, dst); break; case GEN_OCL_GET_GROUP_ID2: regTranslator.newScalarProxy(ir::ocl::groupid2, dst); break; case GEN_OCL_GET_LOCAL_ID0: regTranslator.newScalarProxy(ir::ocl::lid0, dst); break; case GEN_OCL_GET_LOCAL_ID1: regTranslator.newScalarProxy(ir::ocl::lid1, dst); break; case GEN_OCL_GET_LOCAL_ID2: regTranslator.newScalarProxy(ir::ocl::lid2, dst); break; case GEN_OCL_GET_NUM_GROUPS0: regTranslator.newScalarProxy(ir::ocl::numgroup0, dst); break; case GEN_OCL_GET_NUM_GROUPS1: regTranslator.newScalarProxy(ir::ocl::numgroup1, dst); break; case GEN_OCL_GET_NUM_GROUPS2: regTranslator.newScalarProxy(ir::ocl::numgroup2, dst); break; case GEN_OCL_GET_LOCAL_SIZE0: regTranslator.newScalarProxy(ir::ocl::lsize0, dst); break; case GEN_OCL_GET_LOCAL_SIZE1: regTranslator.newScalarProxy(ir::ocl::lsize1, dst); break; case GEN_OCL_GET_LOCAL_SIZE2: regTranslator.newScalarProxy(ir::ocl::lsize2, dst); break; case GEN_OCL_GET_GLOBAL_SIZE0: regTranslator.newScalarProxy(ir::ocl::gsize0, dst); break; case GEN_OCL_GET_GLOBAL_SIZE1: regTranslator.newScalarProxy(ir::ocl::gsize1, dst); break; case GEN_OCL_GET_GLOBAL_SIZE2: regTranslator.newScalarProxy(ir::ocl::gsize2, dst); break; case GEN_OCL_GET_GLOBAL_OFFSET0: regTranslator.newScalarProxy(ir::ocl::goffset0, dst); break; case GEN_OCL_GET_GLOBAL_OFFSET1: regTranslator.newScalarProxy(ir::ocl::goffset1, dst); break; case GEN_OCL_GET_GLOBAL_OFFSET2: regTranslator.newScalarProxy(ir::ocl::goffset2, dst); break; case GEN_OCL_GET_WORK_DIM: regTranslator.newScalarProxy(ir::ocl::workdim, dst); break; case GEN_OCL_FBH: case GEN_OCL_FBL: case GEN_OCL_COS: case GEN_OCL_SIN: case GEN_OCL_SQR: case GEN_OCL_RSQ: case GEN_OCL_LOG: case GEN_OCL_POW: case GEN_OCL_RCP: case GEN_OCL_ABS: case GEN_OCL_FABS: case GEN_OCL_RNDZ: case GEN_OCL_RNDE: case GEN_OCL_RNDU: case GEN_OCL_RNDD: case GEN_OCL_GET_IMAGE_WIDTH: case GEN_OCL_GET_IMAGE_HEIGHT: case GEN_OCL_GET_IMAGE_CHANNEL_DATA_TYPE: case GEN_OCL_GET_IMAGE_CHANNEL_ORDER: case GEN_OCL_GET_IMAGE_DEPTH: case GEN_OCL_GET_SAMPLER_INFO: case GEN_OCL_ATOMIC_ADD0: case GEN_OCL_ATOMIC_ADD1: case GEN_OCL_ATOMIC_SUB0: case GEN_OCL_ATOMIC_SUB1: case GEN_OCL_ATOMIC_AND0: case GEN_OCL_ATOMIC_AND1: case GEN_OCL_ATOMIC_OR0: case GEN_OCL_ATOMIC_OR1: case GEN_OCL_ATOMIC_XOR0: case GEN_OCL_ATOMIC_XOR1: case GEN_OCL_ATOMIC_XCHG0: case GEN_OCL_ATOMIC_XCHG1: case GEN_OCL_ATOMIC_UMAX0: case GEN_OCL_ATOMIC_UMAX1: case GEN_OCL_ATOMIC_UMIN0: case GEN_OCL_ATOMIC_UMIN1: case GEN_OCL_ATOMIC_IMAX0: case GEN_OCL_ATOMIC_IMAX1: case GEN_OCL_ATOMIC_IMIN0: case GEN_OCL_ATOMIC_IMIN1: case GEN_OCL_ATOMIC_INC0: case GEN_OCL_ATOMIC_INC1: case GEN_OCL_ATOMIC_DEC0: case GEN_OCL_ATOMIC_DEC1: case GEN_OCL_ATOMIC_CMPXCHG0: case GEN_OCL_ATOMIC_CMPXCHG1: // No structure can be returned this->newRegister(&I); break; case GEN_OCL_FORCE_SIMD8: case GEN_OCL_FORCE_SIMD16: case GEN_OCL_LBARRIER: case GEN_OCL_GBARRIER: case GEN_OCL_LGBARRIER: ctx.getFunction().setUseSLM(true); break; case GEN_OCL_WRITE_IMAGE0: case GEN_OCL_WRITE_IMAGE1: case GEN_OCL_WRITE_IMAGE2: case GEN_OCL_WRITE_IMAGE3: case GEN_OCL_WRITE_IMAGE4: case GEN_OCL_WRITE_IMAGE5: case GEN_OCL_WRITE_IMAGE10: case GEN_OCL_WRITE_IMAGE11: case GEN_OCL_WRITE_IMAGE12: case GEN_OCL_WRITE_IMAGE13: case GEN_OCL_WRITE_IMAGE14: case GEN_OCL_WRITE_IMAGE15: break; case GEN_OCL_READ_IMAGE0: case GEN_OCL_READ_IMAGE1: case GEN_OCL_READ_IMAGE2: case GEN_OCL_READ_IMAGE3: case GEN_OCL_READ_IMAGE4: case GEN_OCL_READ_IMAGE5: case GEN_OCL_READ_IMAGE10: case GEN_OCL_READ_IMAGE11: case GEN_OCL_READ_IMAGE12: case GEN_OCL_READ_IMAGE13: case GEN_OCL_READ_IMAGE14: case GEN_OCL_READ_IMAGE15: { // dst is a 4 elements vector. We allocate all 4 registers here. uint32_t elemNum; (void)getVectorInfo(ctx, I.getType(), &I, elemNum); GBE_ASSERT(elemNum == 4); this->newRegister(&I); break; } case GEN_OCL_MUL_HI_INT: case GEN_OCL_MUL_HI_UINT: case GEN_OCL_MUL_HI_I64: case GEN_OCL_MUL_HI_UI64: case GEN_OCL_UPSAMPLE_SHORT: case GEN_OCL_UPSAMPLE_INT: case GEN_OCL_UPSAMPLE_LONG: case GEN_OCL_SADD_SAT_CHAR: case GEN_OCL_SADD_SAT_SHORT: case GEN_OCL_SADD_SAT_INT: case GEN_OCL_SADD_SAT_LONG: case GEN_OCL_UADD_SAT_CHAR: case GEN_OCL_UADD_SAT_SHORT: case GEN_OCL_UADD_SAT_INT: case GEN_OCL_UADD_SAT_LONG: case GEN_OCL_SSUB_SAT_CHAR: case GEN_OCL_SSUB_SAT_SHORT: case GEN_OCL_SSUB_SAT_INT: case GEN_OCL_SSUB_SAT_LONG: case GEN_OCL_USUB_SAT_CHAR: case GEN_OCL_USUB_SAT_SHORT: case GEN_OCL_USUB_SAT_INT: case GEN_OCL_USUB_SAT_LONG: case GEN_OCL_HADD: case GEN_OCL_RHADD: case GEN_OCL_I64HADD: case GEN_OCL_I64RHADD: case GEN_OCL_I64_MAD_SAT: case GEN_OCL_I64_MAD_SATU: case GEN_OCL_SAT_CONV_U8_TO_I8: case GEN_OCL_SAT_CONV_I16_TO_I8: case GEN_OCL_SAT_CONV_U16_TO_I8: case GEN_OCL_SAT_CONV_I32_TO_I8: case GEN_OCL_SAT_CONV_U32_TO_I8: case GEN_OCL_SAT_CONV_F32_TO_I8: case GEN_OCL_SAT_CONV_I8_TO_U8: case GEN_OCL_SAT_CONV_I16_TO_U8: case GEN_OCL_SAT_CONV_U16_TO_U8: case GEN_OCL_SAT_CONV_I32_TO_U8: case GEN_OCL_SAT_CONV_U32_TO_U8: case GEN_OCL_SAT_CONV_F32_TO_U8: case GEN_OCL_SAT_CONV_U16_TO_I16: case GEN_OCL_SAT_CONV_I32_TO_I16: case GEN_OCL_SAT_CONV_U32_TO_I16: case GEN_OCL_SAT_CONV_F32_TO_I16: case GEN_OCL_SAT_CONV_I16_TO_U16: case GEN_OCL_SAT_CONV_I32_TO_U16: case GEN_OCL_SAT_CONV_U32_TO_U16: case GEN_OCL_SAT_CONV_F32_TO_U16: case GEN_OCL_SAT_CONV_U32_TO_I32: case GEN_OCL_SAT_CONV_F32_TO_I32: case GEN_OCL_SAT_CONV_I32_TO_U32: case GEN_OCL_SAT_CONV_F32_TO_U32: this->newRegister(&I); break; default: GBE_ASSERTM(false, "Function call are not supported yet"); }; } struct U64CPVExtractFunctor { U64CPVExtractFunctor(ir::Context &ctx) : ctx(ctx) {} template INLINE uint64_t operator() (const T &t) { return uint64_t(t); } ir::Context &ctx; }; void GenWriter::emitUnaryCallInst(CallInst &I, CallSite &CS, ir::Opcode opcode) { CallSite::arg_iterator AI = CS.arg_begin(); #if GBE_DEBUG CallSite::arg_iterator AE = CS.arg_end(); #endif /* GBE_DEBUG */ GBE_ASSERT(AI != AE); const ir::Register src = this->getRegister(*AI); const ir::Register dst = this->getRegister(&I); ctx.ALU1(opcode, ir::TYPE_FLOAT, dst, src); } void GenWriter::emitAtomicInst(CallInst &I, CallSite &CS, ir::AtomicOps opcode) { CallSite::arg_iterator AI = CS.arg_begin(); CallSite::arg_iterator AE = CS.arg_end(); GBE_ASSERT(AI != AE); unsigned int llvmSpace = (*AI)->getType()->getPointerAddressSpace(); const ir::AddressSpace addrSpace = addressSpaceLLVMToGen(llvmSpace); const ir::Register dst = this->getRegister(&I); vector src; uint32_t srcNum = 0; while(AI != AE) { src.push_back(this->getRegister(*(AI++))); srcNum++; } const ir::Tuple srcTuple = ctx.arrayTuple(&src[0], srcNum); ctx.ATOMIC(opcode, dst, addrSpace, srcTuple); } /* append a new sampler. should be called before any reference to * a sampler_t value. */ ir::Register GenWriter::appendSampler(CallSite::arg_iterator AI) { Constant *CPV = dyn_cast(*AI); ir::Register sampler; if (CPV != NULL) { // This is not a kernel argument sampler, we need to append it to sampler set, // and allocate a sampler slot for it. auto x = processConstant(CPV, InsertExtractFunctor(ctx)); GBE_ASSERTM(x.type == ir::TYPE_U32 || x.type == ir::TYPE_S32, "Invalid sampler type"); sampler = ctx.getFunction().getSamplerSet()->append(x.data.u32, &ctx); } else { sampler = this->getRegister(*AI); ctx.getFunction().getSamplerSet()->append(sampler, &ctx); } return sampler; } void GenWriter::emitCallInst(CallInst &I) { if (Function *F = I.getCalledFunction()) { if (F->getIntrinsicID() != 0) { const ir::Function &fn = ctx.getFunction(); switch (F->getIntrinsicID()) { case Intrinsic::stacksave: { const ir::Register dst = this->getRegister(&I); const ir::Register src = ir::ocl::stackptr; const ir::RegisterFamily family = fn.getRegisterFamily(dst); ctx.MOV(ir::getType(family), dst, src); } break; case Intrinsic::stackrestore: { const ir::Register dst = ir::ocl::stackptr; const ir::Register src = this->getRegister(I.getOperand(0)); const ir::RegisterFamily family = fn.getRegisterFamily(dst); ctx.MOV(ir::getType(family), dst, src); } break; #if LLVM_VERSION_MINOR >= 2 case Intrinsic::fmuladd: { const ir::Register tmp = ctx.reg(ir::FAMILY_DWORD); const ir::Register dst = this->getRegister(&I); const ir::Register src0 = this->getRegister(I.getOperand(0)); const ir::Register src1 = this->getRegister(I.getOperand(1)); const ir::Register src2 = this->getRegister(I.getOperand(2)); ctx.MUL(ir::TYPE_FLOAT, tmp, src0, src1); ctx.ADD(ir::TYPE_FLOAT, dst, tmp, src2); break; } break; case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: break; #endif /* LLVM_VERSION_MINOR >= 2 */ default: NOT_IMPLEMENTED; } } else { // Get the name of the called function and handle it Value *Callee = I.getCalledValue(); const std::string fnName = Callee->getName(); auto it = instrinsicMap.map.find(fnName); GBE_ASSERT(it != instrinsicMap.map.end()); // Get the function arguments CallSite CS(&I); CallSite::arg_iterator AI = CS.arg_begin(); #if GBE_DEBUG CallSite::arg_iterator AE = CS.arg_end(); #endif /* GBE_DEBUG */ switch (it->second) { case GEN_OCL_POW: { const ir::Register src0 = this->getRegister(*AI); ++AI; const ir::Register src1 = this->getRegister(*AI); const ir::Register dst = this->getRegister(&I); ctx.POW(ir::TYPE_FLOAT, dst, src0, src1); break; } case GEN_OCL_FBH: this->emitUnaryCallInst(I,CS,ir::OP_FBH); break; case GEN_OCL_FBL: this->emitUnaryCallInst(I,CS,ir::OP_FBL); break; case GEN_OCL_ABS: { const ir::Register src = this->getRegister(*AI); const ir::Register dst = this->getRegister(&I); ctx.ALU1(ir::OP_ABS, ir::TYPE_S32, dst, src); break; } case GEN_OCL_COS: this->emitUnaryCallInst(I,CS,ir::OP_COS); break; case GEN_OCL_SIN: this->emitUnaryCallInst(I,CS,ir::OP_SIN); break; case GEN_OCL_LOG: this->emitUnaryCallInst(I,CS,ir::OP_LOG); break; case GEN_OCL_SQR: this->emitUnaryCallInst(I,CS,ir::OP_SQR); break; case GEN_OCL_RSQ: this->emitUnaryCallInst(I,CS,ir::OP_RSQ); break; case GEN_OCL_RCP: this->emitUnaryCallInst(I,CS,ir::OP_RCP); break; case GEN_OCL_FABS: this->emitUnaryCallInst(I,CS,ir::OP_ABS); break; case GEN_OCL_RNDZ: this->emitUnaryCallInst(I,CS,ir::OP_RNDZ); break; case GEN_OCL_RNDE: this->emitUnaryCallInst(I,CS,ir::OP_RNDE); break; case GEN_OCL_RNDU: this->emitUnaryCallInst(I,CS,ir::OP_RNDU); break; case GEN_OCL_RNDD: this->emitUnaryCallInst(I,CS,ir::OP_RNDD); break; case GEN_OCL_FORCE_SIMD8: ctx.setSimdWidth(8); break; case GEN_OCL_FORCE_SIMD16: ctx.setSimdWidth(16); break; case GEN_OCL_LBARRIER: ctx.SYNC(ir::syncLocalBarrier); break; case GEN_OCL_GBARRIER: ctx.SYNC(ir::syncGlobalBarrier); break; case GEN_OCL_LGBARRIER: ctx.SYNC(ir::syncLocalBarrier | ir::syncGlobalBarrier); break; case GEN_OCL_ATOMIC_ADD0: case GEN_OCL_ATOMIC_ADD1: this->emitAtomicInst(I,CS,ir::ATOMIC_OP_ADD); break; case GEN_OCL_ATOMIC_SUB0: case GEN_OCL_ATOMIC_SUB1: this->emitAtomicInst(I,CS,ir::ATOMIC_OP_SUB); break; case GEN_OCL_ATOMIC_AND0: case GEN_OCL_ATOMIC_AND1: this->emitAtomicInst(I,CS,ir::ATOMIC_OP_AND); break; case GEN_OCL_ATOMIC_OR0: case GEN_OCL_ATOMIC_OR1: this->emitAtomicInst(I,CS,ir::ATOMIC_OP_OR); break; case GEN_OCL_ATOMIC_XOR0: case GEN_OCL_ATOMIC_XOR1: this->emitAtomicInst(I,CS,ir::ATOMIC_OP_XOR); break; case GEN_OCL_ATOMIC_XCHG0: case GEN_OCL_ATOMIC_XCHG1: this->emitAtomicInst(I,CS,ir::ATOMIC_OP_XCHG); break; case GEN_OCL_ATOMIC_INC0: case GEN_OCL_ATOMIC_INC1: this->emitAtomicInst(I,CS,ir::ATOMIC_OP_INC); break; case GEN_OCL_ATOMIC_DEC0: case GEN_OCL_ATOMIC_DEC1: this->emitAtomicInst(I,CS,ir::ATOMIC_OP_DEC); break; case GEN_OCL_ATOMIC_UMIN0: case GEN_OCL_ATOMIC_UMIN1: this->emitAtomicInst(I,CS,ir::ATOMIC_OP_UMIN); break; case GEN_OCL_ATOMIC_UMAX0: case GEN_OCL_ATOMIC_UMAX1: this->emitAtomicInst(I,CS,ir::ATOMIC_OP_UMAX); break; case GEN_OCL_ATOMIC_IMIN0: case GEN_OCL_ATOMIC_IMIN1: this->emitAtomicInst(I,CS,ir::ATOMIC_OP_IMIN); break; case GEN_OCL_ATOMIC_IMAX0: case GEN_OCL_ATOMIC_IMAX1: this->emitAtomicInst(I,CS,ir::ATOMIC_OP_IMAX); break; case GEN_OCL_ATOMIC_CMPXCHG0: case GEN_OCL_ATOMIC_CMPXCHG1: this->emitAtomicInst(I,CS,ir::ATOMIC_OP_CMPXCHG); break; case GEN_OCL_GET_IMAGE_WIDTH: case GEN_OCL_GET_IMAGE_HEIGHT: case GEN_OCL_GET_IMAGE_DEPTH: case GEN_OCL_GET_IMAGE_CHANNEL_DATA_TYPE: case GEN_OCL_GET_IMAGE_CHANNEL_ORDER: { GBE_ASSERT(AI != AE); const ir::Register surface_id = this->getRegister(*AI); ++AI; uint32_t elemNum; (void)getVectorInfo(ctx, I.getType(), &I, elemNum); const ir::Register reg = this->getRegister(&I, 0); int infoType = it->second - GEN_OCL_GET_IMAGE_WIDTH; ctx.GET_IMAGE_INFO(infoType, reg, surface_id, ctx.reg(ir::FAMILY_DWORD)); break; } case GEN_OCL_GET_SAMPLER_INFO: { GBE_ASSERT(AI != AE); const ir::Register sampler = this->appendSampler(AI); ++AI; const ir::Register reg = this->getRegister(&I, 0); ctx.GET_SAMPLER_INFO(reg, sampler); break; } case GEN_OCL_READ_IMAGE0: case GEN_OCL_READ_IMAGE1: case GEN_OCL_READ_IMAGE2: case GEN_OCL_READ_IMAGE3: case GEN_OCL_READ_IMAGE4: case GEN_OCL_READ_IMAGE5: case GEN_OCL_READ_IMAGE10: case GEN_OCL_READ_IMAGE11: case GEN_OCL_READ_IMAGE12: case GEN_OCL_READ_IMAGE13: case GEN_OCL_READ_IMAGE14: case GEN_OCL_READ_IMAGE15: { GBE_ASSERT(AI != AE); const ir::Register surface_id = this->getRegister(*AI); ++AI; GBE_ASSERT(AI != AE); const ir::Register sampler = this->appendSampler(AI); ++AI; GBE_ASSERT(AI != AE); const ir::Register ucoord = this->getRegister(*AI); ++AI; GBE_ASSERT(AI != AE); const ir::Register vcoord = this->getRegister(*AI); ++AI; ir::Register wcoord; if (it->second >= GEN_OCL_READ_IMAGE10 && it->second <= GEN_OCL_READ_IMAGE15) { GBE_ASSERT(AI != AE); wcoord = this->getRegister(*AI); ++AI; } else wcoord = ir::Register(0); vector dstTupleData, srcTupleData; const uint32_t elemNum = 4; for (uint32_t elemID = 0; elemID < elemNum; ++elemID) { const ir::Register reg = this->getRegister(&I, elemID); dstTupleData.push_back(reg); } srcTupleData.push_back(surface_id); srcTupleData.push_back(sampler); srcTupleData.push_back(ucoord); srcTupleData.push_back(vcoord); srcTupleData.push_back(wcoord); #ifdef GEN7_SAMPLER_CLAMP_BORDER_WORKAROUND GBE_ASSERT(AI != AE); Constant *CPV = dyn_cast(*AI); assert(CPV); auto x = processConstant(CPV, InsertExtractFunctor(ctx)); GBE_ASSERTM(x.type == ir::TYPE_U32 || x.type == ir::TYPE_S32, "Invalid sampler type"); ir::Register offsetReg(x.data.u32); srcTupleData.push_back(offsetReg); #else ir::Register offsetReg(0); #endif srcTupleData.push_back(offsetReg); const ir::Tuple dstTuple = ctx.arrayTuple(&dstTupleData[0], elemNum); const ir::Tuple srcTuple = ctx.arrayTuple(&srcTupleData[0], 6); ir::Type srcType = ir::TYPE_S32, dstType = ir::TYPE_U32; switch(it->second) { case GEN_OCL_READ_IMAGE0: case GEN_OCL_READ_IMAGE2: case GEN_OCL_READ_IMAGE10: case GEN_OCL_READ_IMAGE12: dstType = ir::TYPE_U32; srcType = ir::TYPE_S32; break; case GEN_OCL_READ_IMAGE1: case GEN_OCL_READ_IMAGE3: case GEN_OCL_READ_IMAGE11: case GEN_OCL_READ_IMAGE13: dstType = ir::TYPE_U32; srcType = ir::TYPE_FLOAT; break; case GEN_OCL_READ_IMAGE4: case GEN_OCL_READ_IMAGE14: dstType = ir::TYPE_FLOAT; srcType = ir::TYPE_S32; break; case GEN_OCL_READ_IMAGE5: case GEN_OCL_READ_IMAGE15: srcType = dstType = ir::TYPE_FLOAT; break; default: GBE_ASSERT(0); // never been here. } ctx.SAMPLE(dstTuple, srcTuple, dstType, srcType); break; } case GEN_OCL_WRITE_IMAGE0: case GEN_OCL_WRITE_IMAGE1: case GEN_OCL_WRITE_IMAGE2: case GEN_OCL_WRITE_IMAGE3: case GEN_OCL_WRITE_IMAGE4: case GEN_OCL_WRITE_IMAGE5: case GEN_OCL_WRITE_IMAGE10: case GEN_OCL_WRITE_IMAGE11: case GEN_OCL_WRITE_IMAGE12: case GEN_OCL_WRITE_IMAGE13: case GEN_OCL_WRITE_IMAGE14: case GEN_OCL_WRITE_IMAGE15: { GBE_ASSERT(AI != AE); const ir::Register surface_id = this->getRegister(*AI); ++AI; GBE_ASSERT(AI != AE); const ir::Register ucoord = this->getRegister(*AI); ++AI; GBE_ASSERT(AI != AE); const ir::Register vcoord = this->getRegister(*AI); ++AI; ir::Register wcoord; if(it->second >= GEN_OCL_WRITE_IMAGE10 && it->second <= GEN_OCL_WRITE_IMAGE15) { GBE_ASSERT(AI != AE); wcoord = this->getRegister(*AI); ++AI; } else wcoord = ir::Register(0); GBE_ASSERT(AI != AE); vector srcTupleData; srcTupleData.push_back(surface_id); srcTupleData.push_back(ucoord); srcTupleData.push_back(vcoord); srcTupleData.push_back(wcoord); const uint32_t elemNum = 4; for (uint32_t elemID = 0; elemID < elemNum; ++elemID) { const ir::Register reg = this->getRegister(*AI, elemID); srcTupleData.push_back(reg); } const ir::Tuple srcTuple = ctx.arrayTuple(&srcTupleData[0], 8); ir::Type srcType = ir::TYPE_U32, coordType = ir::TYPE_U32; switch(it->second) { case GEN_OCL_WRITE_IMAGE0: case GEN_OCL_WRITE_IMAGE2: case GEN_OCL_WRITE_IMAGE10: case GEN_OCL_WRITE_IMAGE12: srcType = coordType = ir::TYPE_U32; break; case GEN_OCL_WRITE_IMAGE1: case GEN_OCL_WRITE_IMAGE3: case GEN_OCL_WRITE_IMAGE11: case GEN_OCL_WRITE_IMAGE13: coordType = ir::TYPE_FLOAT; srcType = ir::TYPE_U32; break; case GEN_OCL_WRITE_IMAGE4: case GEN_OCL_WRITE_IMAGE14: srcType = ir::TYPE_FLOAT; coordType = ir::TYPE_U32; break; case GEN_OCL_WRITE_IMAGE5: case GEN_OCL_WRITE_IMAGE15: srcType = coordType = ir::TYPE_FLOAT; break; default: GBE_ASSERT(0); // never been here. } ctx.TYPED_WRITE(srcTuple, srcType, coordType); break; } case GEN_OCL_MUL_HI_INT: { GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI; GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*AI); ++AI; const ir::Register dst = this->getRegister(&I); ctx.MUL_HI(getType(ctx, I.getType()), dst, src0, src1); break; } case GEN_OCL_MUL_HI_UINT: { GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI; GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*AI); ++AI; const ir::Register dst = this->getRegister(&I); ctx.MUL_HI(getUnsignedType(ctx, I.getType()), dst, src0, src1); break; } case GEN_OCL_MUL_HI_I64: { GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI; GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*AI); ++AI; const ir::Register dst = this->getRegister(&I); ctx.I64_MUL_HI(getType(ctx, I.getType()), dst, src0, src1); break; } case GEN_OCL_MUL_HI_UI64: { GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI; GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*AI); ++AI; const ir::Register dst = this->getRegister(&I); ctx.I64_MUL_HI(getUnsignedType(ctx, I.getType()), dst, src0, src1); break; } case GEN_OCL_UPSAMPLE_SHORT: { GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI; GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*AI); ++AI; const ir::Register dst = this->getRegister(&I); ctx.UPSAMPLE_SHORT(getType(ctx, I.getType()), dst, src0, src1); break; } case GEN_OCL_UPSAMPLE_INT: { GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI; GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*AI); ++AI; const ir::Register dst = this->getRegister(&I); ctx.UPSAMPLE_INT(getType(ctx, I.getType()), dst, src0, src1); break; } case GEN_OCL_UPSAMPLE_LONG: { GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI; GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*AI); ++AI; const ir::Register dst = this->getRegister(&I); ctx.UPSAMPLE_LONG(getType(ctx, I.getType()), dst, src0, src1); break; } case GEN_OCL_SADD_SAT_CHAR: case GEN_OCL_SADD_SAT_SHORT: case GEN_OCL_SADD_SAT_INT: case GEN_OCL_SADD_SAT_LONG: { GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI; GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*AI); ++AI; const ir::Register dst = this->getRegister(&I); ctx.ADDSAT(getType(ctx, I.getType()), dst, src0, src1); break; } case GEN_OCL_UADD_SAT_CHAR: case GEN_OCL_UADD_SAT_SHORT: case GEN_OCL_UADD_SAT_INT: case GEN_OCL_UADD_SAT_LONG: { GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI; GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*AI); ++AI; const ir::Register dst = this->getRegister(&I); ctx.ADDSAT(getUnsignedType(ctx, I.getType()), dst, src0, src1); break; } case GEN_OCL_SSUB_SAT_CHAR: case GEN_OCL_SSUB_SAT_SHORT: case GEN_OCL_SSUB_SAT_INT: case GEN_OCL_SSUB_SAT_LONG: { GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI; GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*AI); ++AI; const ir::Register dst = this->getRegister(&I); ctx.SUBSAT(getType(ctx, I.getType()), dst, src0, src1); break; } case GEN_OCL_USUB_SAT_CHAR: case GEN_OCL_USUB_SAT_SHORT: case GEN_OCL_USUB_SAT_INT: case GEN_OCL_USUB_SAT_LONG: { GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI; GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*AI); ++AI; const ir::Register dst = this->getRegister(&I); ctx.SUBSAT(getUnsignedType(ctx, I.getType()), dst, src0, src1); break; } case GEN_OCL_I64_MAD_SAT: { GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI; GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*AI); ++AI; GBE_ASSERT(AI != AE); const ir::Register src2 = this->getRegister(*AI); ++AI; const ir::Register dst = this->getRegister(&I); ctx.I64MADSAT(getType(ctx, I.getType()), dst, src0, src1, src2); break; } case GEN_OCL_I64_MAD_SATU: { GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI; GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*AI); ++AI; GBE_ASSERT(AI != AE); const ir::Register src2 = this->getRegister(*AI); ++AI; const ir::Register dst = this->getRegister(&I); ctx.I64MADSAT(getUnsignedType(ctx, I.getType()), dst, src0, src1, src2); break; } case GEN_OCL_HADD: { GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI; GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*AI); ++AI; const ir::Register dst = this->getRegister(&I); ctx.HADD(getUnsignedType(ctx, I.getType()), dst, src0, src1); break; } case GEN_OCL_I64HADD: { GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*(AI++)); GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*(AI++)); const ir::Register dst = this->getRegister(&I); ctx.I64HADD(ir::TYPE_U64, dst, src0, src1); break; } case GEN_OCL_RHADD: { GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI; GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*AI); ++AI; const ir::Register dst = this->getRegister(&I); ctx.RHADD(getUnsignedType(ctx, I.getType()), dst, src0, src1); break; } case GEN_OCL_I64RHADD: { GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*(AI++)); GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*(AI++)); const ir::Register dst = this->getRegister(&I); ctx.I64RHADD(ir::TYPE_U64, dst, src0, src1); break; } #define DEF(DST_TYPE, SRC_TYPE) \ { ctx.SAT_CVT(DST_TYPE, SRC_TYPE, getRegister(&I), getRegister(I.getOperand(0))); break; } case GEN_OCL_SAT_CONV_U8_TO_I8: DEF(ir::TYPE_S8, ir::TYPE_U8); case GEN_OCL_SAT_CONV_I16_TO_I8: DEF(ir::TYPE_S8, ir::TYPE_S16); case GEN_OCL_SAT_CONV_U16_TO_I8: DEF(ir::TYPE_S8, ir::TYPE_U16); case GEN_OCL_SAT_CONV_I32_TO_I8: DEF(ir::TYPE_S8, ir::TYPE_S32); case GEN_OCL_SAT_CONV_U32_TO_I8: DEF(ir::TYPE_S8, ir::TYPE_U32); case GEN_OCL_SAT_CONV_F32_TO_I8: DEF(ir::TYPE_S8, ir::TYPE_FLOAT); case GEN_OCL_SAT_CONV_I8_TO_U8: DEF(ir::TYPE_U8, ir::TYPE_S8); case GEN_OCL_SAT_CONV_I16_TO_U8: DEF(ir::TYPE_U8, ir::TYPE_S16); case GEN_OCL_SAT_CONV_U16_TO_U8: DEF(ir::TYPE_U8, ir::TYPE_U16); case GEN_OCL_SAT_CONV_I32_TO_U8: DEF(ir::TYPE_U8, ir::TYPE_S32); case GEN_OCL_SAT_CONV_U32_TO_U8: DEF(ir::TYPE_U8, ir::TYPE_U32); case GEN_OCL_SAT_CONV_F32_TO_U8: DEF(ir::TYPE_U8, ir::TYPE_FLOAT); case GEN_OCL_SAT_CONV_U16_TO_I16: DEF(ir::TYPE_S16, ir::TYPE_U16); case GEN_OCL_SAT_CONV_I32_TO_I16: DEF(ir::TYPE_S16, ir::TYPE_S32); case GEN_OCL_SAT_CONV_U32_TO_I16: DEF(ir::TYPE_S16, ir::TYPE_U32); case GEN_OCL_SAT_CONV_F32_TO_I16: DEF(ir::TYPE_S16, ir::TYPE_FLOAT); case GEN_OCL_SAT_CONV_I16_TO_U16: DEF(ir::TYPE_U16, ir::TYPE_S16); case GEN_OCL_SAT_CONV_I32_TO_U16: DEF(ir::TYPE_U16, ir::TYPE_S32); case GEN_OCL_SAT_CONV_U32_TO_U16: DEF(ir::TYPE_U16, ir::TYPE_U32); case GEN_OCL_SAT_CONV_F32_TO_U16: DEF(ir::TYPE_U16, ir::TYPE_FLOAT); case GEN_OCL_SAT_CONV_U32_TO_I32: DEF(ir::TYPE_S32, ir::TYPE_U32); case GEN_OCL_SAT_CONV_F32_TO_I32: DEF(ir::TYPE_S32, ir::TYPE_FLOAT); case GEN_OCL_SAT_CONV_I32_TO_U32: DEF(ir::TYPE_U32, ir::TYPE_S32); case GEN_OCL_SAT_CONV_F32_TO_U32: DEF(ir::TYPE_U32, ir::TYPE_FLOAT); #undef DEF default: break; } } } } void GenWriter::regAllocateAllocaInst(AllocaInst &I) { this->newRegister(&I); } void GenWriter::emitAllocaInst(AllocaInst &I) { Value *src = I.getOperand(0); Type *elemType = I.getType()->getElementType(); ir::ImmediateIndex immIndex; bool needMultiply = true; // Be aware, we manipulate pointers if (ctx.getPointerSize() == ir::POINTER_32_BITS) immIndex = ctx.newImmediate(uint32_t(getTypeByteSize(unit, elemType))); else immIndex = ctx.newImmediate(uint64_t(getTypeByteSize(unit, elemType))); // OK, we try to see if we know compile time the size we need to allocate if (I.isArrayAllocation() == false) // one element allocated only needMultiply = false; else { Constant *CPV = dyn_cast(src); if (CPV) { const uint64_t elemNum = processConstant(CPV, U64CPVExtractFunctor(ctx)); ir::Immediate imm = ctx.getImmediate(immIndex); imm.data.u64 = ALIGN(imm.data.u64 * elemNum, 4); ctx.setImmediate(immIndex, imm); needMultiply = false; } else { // Brutal but cheap way to get arrays aligned on 4 bytes: we just align // the element on 4 bytes! ir::Immediate imm = ctx.getImmediate(immIndex); imm.data.u64 = ALIGN(imm.data.u64, 4); ctx.setImmediate(immIndex, imm); } } // Now emit the stream of instructions to get the allocated pointer const ir::RegisterFamily pointerFamily = ctx.getPointerFamily(); const ir::Register dst = this->getRegister(&I); const ir::Register stack = ir::ocl::stackptr; const ir::Register reg = ctx.reg(pointerFamily); const ir::Immediate imm = ctx.getImmediate(immIndex); // Set the destination register properly ctx.MOV(imm.type, dst, stack); // Easy case, we just increment the stack pointer if (needMultiply == false) { ctx.LOADI(imm.type, reg, immIndex); ctx.ADD(imm.type, stack, stack, reg); } // Harder case (variable length array) that requires a multiply else { ctx.LOADI(imm.type, reg, immIndex); ctx.MUL(imm.type, reg, this->getRegister(src), reg); ctx.ADD(imm.type, stack, stack, reg); } } static INLINE Value *getLoadOrStoreValue(LoadInst &I) { return &I; } static INLINE Value *getLoadOrStoreValue(StoreInst &I) { return I.getValueOperand(); } void GenWriter::regAllocateLoadInst(LoadInst &I) { this->newRegister(&I); } void GenWriter::regAllocateStoreInst(StoreInst &I) {} extern int OCL_SIMD_WIDTH; template INLINE void GenWriter::emitLoadOrStore(T &I) { unsigned int llvmSpace = I.getPointerAddressSpace(); Value *llvmPtr = I.getPointerOperand(); Value *llvmValues = getLoadOrStoreValue(I); Type *llvmType = llvmValues->getType(); const bool dwAligned = (I.getAlignment() % 4) == 0; const ir::AddressSpace addrSpace = addressSpaceLLVMToGen(llvmSpace); const ir::Register ptr = this->getRegister(llvmPtr); // Scalar is easy. We neednot build register tuples if (isScalarType(llvmType) == true) { const ir::Type type = getType(ctx, llvmType); const ir::Register values = this->getRegister(llvmValues); if (isLoad) ctx.LOAD(type, ptr, addrSpace, dwAligned, values); else ctx.STORE(type, ptr, addrSpace, dwAligned, values); } // A vector type requires to build a tuple else { VectorType *vectorType = cast(llvmType); Type *elemType = vectorType->getElementType(); // We follow OCL spec and support 2,3,4,8,16 elements only uint32_t elemNum = vectorType->getNumElements(); GBE_ASSERTM(elemNum == 2 || elemNum == 3 || elemNum == 4 || elemNum == 8 || elemNum == 16, "Only vectors of 2,3,4,8 or 16 elements are supported"); // Per OPenCL 1.2 spec 6.1.5: // For 3-component vector data types, the size of the data type is 4 * sizeof(component). // And the llvm does cast a type3 data to type4 for load/store instruction, // so a 4 elements vector may only have 3 valid elements. We need to fix it to correct element // count here. if (elemNum == 4 && regTranslator.isUndefConst(llvmValues, 3)) elemNum = 3; // The code is going to be fairly different from types to types (based on // size of each vector element) const ir::Type type = getType(ctx, elemType); const ir::RegisterFamily pointerFamily = ctx.getPointerFamily(); if ((type == ir::TYPE_FLOAT || type == ir::TYPE_U32 || type == ir::TYPE_S32) && addrSpace != ir::MEM_CONSTANT) { // One message is enough here. Nothing special to do if (elemNum <= 4) { // Build the tuple data in the vector vector tupleData; // put registers here for (uint32_t elemID = 0; elemID < elemNum; ++elemID) { ir::Register reg; if(regTranslator.isUndefConst(llvmValues, elemID)) { Value *v = Constant::getNullValue(elemType); reg = this->getRegister(v); } else reg = this->getRegister(llvmValues, elemID); tupleData.push_back(reg); } const ir::Tuple tuple = ctx.arrayTuple(&tupleData[0], elemNum); // Emit the instruction if (isLoad) ctx.LOAD(type, tuple, ptr, addrSpace, elemNum, dwAligned); else ctx.STORE(type, tuple, ptr, addrSpace, elemNum, dwAligned); } // Not supported by the hardware. So, we split the message and we use // strided loads and stores else { // We simply use several uint4 loads const uint32_t msgNum = elemNum / 4; for (uint32_t msg = 0; msg < msgNum; ++msg) { // Build the tuple data in the vector vector tupleData; // put registers here for (uint32_t elemID = 0; elemID < 4; ++elemID) { ir::Register reg; if(regTranslator.isUndefConst(llvmValues, elemID)) { Value *v = Constant::getNullValue(elemType); reg = this->getRegister(v); } else reg = this->getRegister(llvmValues, 4*msg+elemID); tupleData.push_back(reg); } const ir::Tuple tuple = ctx.arrayTuple(&tupleData[0], 4); // We may need to update to offset the pointer ir::Register addr; if (msg == 0) addr = ptr; else { const ir::Register offset = ctx.reg(pointerFamily); ir::ImmediateIndex immIndex; ir::Type immType; if (pointerFamily == ir::FAMILY_DWORD) { immIndex = ctx.newImmediate(int32_t(msg*sizeof(uint32_t[4]))); immType = ir::TYPE_S32; } else { immIndex = ctx.newImmediate(int64_t(msg*sizeof(uint64_t[4]))); immType = ir::TYPE_S64; } addr = ctx.reg(pointerFamily); ctx.LOADI(immType, offset, immIndex); ctx.ADD(immType, addr, ptr, offset); } // Emit the instruction if (isLoad) ctx.LOAD(type, tuple, addr, addrSpace, 4, true); else ctx.STORE(type, tuple, addr, addrSpace, 4, true); } } } else { for (uint32_t elemID = 0; elemID < elemNum; elemID++) { if(regTranslator.isUndefConst(llvmValues, elemID)) continue; const ir::Register reg = this->getRegister(llvmValues, elemID); ir::Register addr; if (elemID == 0) addr = ptr; else { const ir::Register offset = ctx.reg(pointerFamily); ir::ImmediateIndex immIndex; int elemSize = getTypeByteSize(unit, elemType); immIndex = ctx.newImmediate(int32_t(elemID * elemSize)); addr = ctx.reg(pointerFamily); ctx.LOADI(ir::TYPE_S32, offset, immIndex); ctx.ADD(ir::TYPE_S32, addr, ptr, offset); } if (isLoad) ctx.LOAD(type, addr, addrSpace, dwAligned, reg); else ctx.STORE(type, addr, addrSpace, dwAligned, reg); } } } } void GenWriter::emitLoadInst(LoadInst &I) { this->emitLoadOrStore(I); } void GenWriter::emitStoreInst(StoreInst &I) { this->emitLoadOrStore(I); } llvm::FunctionPass *createGenPass(ir::Unit &unit) { return new GenWriter(unit); } } /* namespace gbe */ Release_v0.3/backend/src/llvm/llvm_gen_backend.hpp000066400000000000000000000052561223142177000223260ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file llvm_gen_backend.hpp * \author Benjamin Segovia * * Pass generation functions */ #ifndef __GBE_LLVM_GEN_BACKEND_HPP__ #define __GBE_LLVM_GEN_BACKEND_HPP__ #include "llvm/Pass.h" #include "sys/platform.hpp" #include "sys/map.hpp" #include "sys/hash_map.hpp" #include // LLVM Type namespace llvm { class Type; } namespace gbe { // Final target of the Gen backend namespace ir { class Unit; } /*! All intrinsic Gen functions */ enum OCLInstrinsic { #define DECL_LLVM_GEN_FUNCTION(ID, NAME) GEN_OCL_##ID, #include "llvm_gen_ocl_function.hxx" #undef DECL_LLVM_GEN_FUNCTION }; /*! Build the hash map for OCL functions on Gen */ struct OCLIntrinsicMap { /*! Build the intrinsic hash map */ OCLIntrinsicMap(void) { #define DECL_LLVM_GEN_FUNCTION(ID, NAME) \ map.insert(std::make_pair(#NAME, GEN_OCL_##ID)); #include "llvm_gen_ocl_function.hxx" #undef DECL_LLVM_GEN_FUNCTION } /*! Sort intrinsics with their names */ hash_map map; }; /*! Sort the OCL Gen instrinsic functions (built on pre-main) */ static const OCLIntrinsicMap instrinsicMap; /*! Pad the offset */ uint32_t getPadding(uint32_t offset, uint32_t align); /*! Get the type alignment in bytes */ uint32_t getAlignmentByte(const ir::Unit &unit, llvm::Type* Ty); /*! Get the type size in bits */ uint32_t getTypeBitSize(const ir::Unit &unit, llvm::Type* Ty); /*! Get the type size in bytes */ uint32_t getTypeByteSize(const ir::Unit &unit, llvm::Type* Ty); /*! whether this is a kernel function */ bool isKernelFunction(const llvm::Function &f); /*! Create a Gen-IR unit */ llvm::FunctionPass *createGenPass(ir::Unit &unit); /*! Remove the GEP instructions */ llvm::BasicBlockPass *createRemoveGEPPass(const ir::Unit &unit); llvm::FunctionPass* createScalarizePass(); } /* namespace gbe */ #endif /* __GBE_LLVM_GEN_BACKEND_HPP__ */ Release_v0.3/backend/src/llvm/llvm_gen_ocl_function.hxx000066400000000000000000000232241223142177000234340ustar00rootroot00000000000000DECL_LLVM_GEN_FUNCTION(GET_GROUP_ID0, __gen_ocl_get_group_id0) DECL_LLVM_GEN_FUNCTION(GET_GROUP_ID1, __gen_ocl_get_group_id1) DECL_LLVM_GEN_FUNCTION(GET_GROUP_ID2, __gen_ocl_get_group_id2) DECL_LLVM_GEN_FUNCTION(GET_LOCAL_ID0, __gen_ocl_get_local_id0) DECL_LLVM_GEN_FUNCTION(GET_LOCAL_ID1, __gen_ocl_get_local_id1) DECL_LLVM_GEN_FUNCTION(GET_LOCAL_ID2, __gen_ocl_get_local_id2) DECL_LLVM_GEN_FUNCTION(GET_NUM_GROUPS0, __gen_ocl_get_num_groups0) DECL_LLVM_GEN_FUNCTION(GET_NUM_GROUPS1, __gen_ocl_get_num_groups1) DECL_LLVM_GEN_FUNCTION(GET_NUM_GROUPS2, __gen_ocl_get_num_groups2) DECL_LLVM_GEN_FUNCTION(GET_LOCAL_SIZE0, __gen_ocl_get_local_size0) DECL_LLVM_GEN_FUNCTION(GET_LOCAL_SIZE1, __gen_ocl_get_local_size1) DECL_LLVM_GEN_FUNCTION(GET_LOCAL_SIZE2, __gen_ocl_get_local_size2) DECL_LLVM_GEN_FUNCTION(GET_GLOBAL_SIZE0, __gen_ocl_get_global_size0) DECL_LLVM_GEN_FUNCTION(GET_GLOBAL_SIZE1, __gen_ocl_get_global_size1) DECL_LLVM_GEN_FUNCTION(GET_GLOBAL_SIZE2, __gen_ocl_get_global_size2) DECL_LLVM_GEN_FUNCTION(GET_GLOBAL_OFFSET0, __gen_ocl_get_global_offset0) DECL_LLVM_GEN_FUNCTION(GET_GLOBAL_OFFSET1, __gen_ocl_get_global_offset1) DECL_LLVM_GEN_FUNCTION(GET_GLOBAL_OFFSET2, __gen_ocl_get_global_offset2) DECL_LLVM_GEN_FUNCTION(GET_WORK_DIM, __gen_ocl_get_work_dim) // Math function DECL_LLVM_GEN_FUNCTION(FABS, __gen_ocl_fabs) DECL_LLVM_GEN_FUNCTION(COS, __gen_ocl_cos) DECL_LLVM_GEN_FUNCTION(SIN, __gen_ocl_sin) DECL_LLVM_GEN_FUNCTION(SQR, __gen_ocl_sqrt) DECL_LLVM_GEN_FUNCTION(RSQ, __gen_ocl_rsqrt) DECL_LLVM_GEN_FUNCTION(LOG, __gen_ocl_log) DECL_LLVM_GEN_FUNCTION(POW, __gen_ocl_pow) DECL_LLVM_GEN_FUNCTION(RCP, __gen_ocl_rcp) DECL_LLVM_GEN_FUNCTION(RNDZ, __gen_ocl_rndz) DECL_LLVM_GEN_FUNCTION(RNDE, __gen_ocl_rnde) DECL_LLVM_GEN_FUNCTION(RNDU, __gen_ocl_rndu) DECL_LLVM_GEN_FUNCTION(RNDD, __gen_ocl_rndd) // Barrier function DECL_LLVM_GEN_FUNCTION(LBARRIER, __gen_ocl_barrier_local) DECL_LLVM_GEN_FUNCTION(GBARRIER, __gen_ocl_barrier_global) DECL_LLVM_GEN_FUNCTION(LGBARRIER, __gen_ocl_barrier_local_and_global) // To force SIMD8/16 compilation DECL_LLVM_GEN_FUNCTION(FORCE_SIMD8, __gen_ocl_force_simd8) DECL_LLVM_GEN_FUNCTION(FORCE_SIMD16, __gen_ocl_force_simd16) // To read_image functions. DECL_LLVM_GEN_FUNCTION(READ_IMAGE0, _Z21__gen_ocl_read_imageijjiij) DECL_LLVM_GEN_FUNCTION(READ_IMAGE1, _Z21__gen_ocl_read_imageijjffj) DECL_LLVM_GEN_FUNCTION(READ_IMAGE2, _Z22__gen_ocl_read_imageuijjiij) DECL_LLVM_GEN_FUNCTION(READ_IMAGE3, _Z22__gen_ocl_read_imageuijjffj) DECL_LLVM_GEN_FUNCTION(READ_IMAGE4, _Z21__gen_ocl_read_imagefjjiij) DECL_LLVM_GEN_FUNCTION(READ_IMAGE5, _Z21__gen_ocl_read_imagefjjffj) DECL_LLVM_GEN_FUNCTION(READ_IMAGE10, _Z21__gen_ocl_read_imageijjiiij) DECL_LLVM_GEN_FUNCTION(READ_IMAGE11, _Z21__gen_ocl_read_imageijjfffj) DECL_LLVM_GEN_FUNCTION(READ_IMAGE12, _Z22__gen_ocl_read_imageuijjiiij) DECL_LLVM_GEN_FUNCTION(READ_IMAGE13, _Z22__gen_ocl_read_imageuijjfffj) DECL_LLVM_GEN_FUNCTION(READ_IMAGE14, _Z21__gen_ocl_read_imagefjjiiij) DECL_LLVM_GEN_FUNCTION(READ_IMAGE15, _Z21__gen_ocl_read_imagefjjfffj) // To write_image functions. DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE0, _Z22__gen_ocl_write_imageijiiDv4_i) DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE1, _Z22__gen_ocl_write_imageijffDv4_i) DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE4, _Z22__gen_ocl_write_imagefjiiDv4_f) DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE5, _Z22__gen_ocl_write_imagefjffDv4_f) DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE2, _Z23__gen_ocl_write_imageuijiiDv4_j) DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE3, _Z23__gen_ocl_write_imageuijffDv4_j) DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE10, _Z22__gen_ocl_write_imageijiiiDv4_i) DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE11, _Z22__gen_ocl_write_imageijfffDv4_i) DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE12, _Z23__gen_ocl_write_imageuijiiiDv4_j) DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE13, _Z23__gen_ocl_write_imageuijfffDv4_j) DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE14, _Z22__gen_ocl_write_imagefjiiiDv4_f) DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE15, _Z22__gen_ocl_write_imagefjfffDv4_f) // To get image info function DECL_LLVM_GEN_FUNCTION(GET_IMAGE_WIDTH, __gen_ocl_get_image_width) DECL_LLVM_GEN_FUNCTION(GET_IMAGE_HEIGHT, __gen_ocl_get_image_height) DECL_LLVM_GEN_FUNCTION(GET_IMAGE_DEPTH, __gen_ocl_get_image_depth) DECL_LLVM_GEN_FUNCTION(GET_IMAGE_CHANNEL_DATA_TYPE, __gen_ocl_get_image_channel_data_type) DECL_LLVM_GEN_FUNCTION(GET_IMAGE_CHANNEL_ORDER, __gen_ocl_get_image_channel_order) // atomic related functions. DECL_LLVM_GEN_FUNCTION(ATOMIC_ADD0, _Z20__gen_ocl_atomic_addPU3AS1jj) DECL_LLVM_GEN_FUNCTION(ATOMIC_ADD1, _Z20__gen_ocl_atomic_addPU3AS3jj) DECL_LLVM_GEN_FUNCTION(ATOMIC_SUB0, _Z20__gen_ocl_atomic_subPU3AS1jj) DECL_LLVM_GEN_FUNCTION(ATOMIC_SUB1, _Z20__gen_ocl_atomic_subPU3AS3jj) DECL_LLVM_GEN_FUNCTION(ATOMIC_AND0, _Z20__gen_ocl_atomic_andPU3AS1jj) DECL_LLVM_GEN_FUNCTION(ATOMIC_AND1, _Z20__gen_ocl_atomic_andPU3AS3jj) DECL_LLVM_GEN_FUNCTION(ATOMIC_OR0, _Z19__gen_ocl_atomic_orPU3AS1jj) DECL_LLVM_GEN_FUNCTION(ATOMIC_OR1, _Z19__gen_ocl_atomic_orPU3AS3jj) DECL_LLVM_GEN_FUNCTION(ATOMIC_XOR0, _Z20__gen_ocl_atomic_xorPU3AS1jj) DECL_LLVM_GEN_FUNCTION(ATOMIC_XOR1, _Z20__gen_ocl_atomic_xorPU3AS3jj) DECL_LLVM_GEN_FUNCTION(ATOMIC_UMIN0, _Z21__gen_ocl_atomic_uminPU3AS1jj) DECL_LLVM_GEN_FUNCTION(ATOMIC_UMIN1, _Z21__gen_ocl_atomic_uminPU3AS3jj) DECL_LLVM_GEN_FUNCTION(ATOMIC_UMAX0, _Z21__gen_ocl_atomic_umaxPU3AS1jj) DECL_LLVM_GEN_FUNCTION(ATOMIC_UMAX1, _Z21__gen_ocl_atomic_umaxPU3AS3jj) DECL_LLVM_GEN_FUNCTION(ATOMIC_IMIN0, _Z21__gen_ocl_atomic_iminPU3AS1jj) DECL_LLVM_GEN_FUNCTION(ATOMIC_IMIN1, _Z21__gen_ocl_atomic_iminPU3AS3jj) DECL_LLVM_GEN_FUNCTION(ATOMIC_IMAX0, _Z21__gen_ocl_atomic_imaxPU3AS1jj) DECL_LLVM_GEN_FUNCTION(ATOMIC_IMAX1, _Z21__gen_ocl_atomic_imaxPU3AS3jj) DECL_LLVM_GEN_FUNCTION(ATOMIC_XCHG0, _Z21__gen_ocl_atomic_xchgPU3AS1jj) DECL_LLVM_GEN_FUNCTION(ATOMIC_XCHG1, _Z21__gen_ocl_atomic_xchgPU3AS3jj) DECL_LLVM_GEN_FUNCTION(ATOMIC_INC0, _Z20__gen_ocl_atomic_incPU3AS1j) DECL_LLVM_GEN_FUNCTION(ATOMIC_INC1, _Z20__gen_ocl_atomic_incPU3AS3j) DECL_LLVM_GEN_FUNCTION(ATOMIC_DEC0, _Z20__gen_ocl_atomic_decPU3AS1j) DECL_LLVM_GEN_FUNCTION(ATOMIC_DEC1, _Z20__gen_ocl_atomic_decPU3AS3j) DECL_LLVM_GEN_FUNCTION(ATOMIC_CMPXCHG0, _Z24__gen_ocl_atomic_cmpxchgPU3AS1jjj) DECL_LLVM_GEN_FUNCTION(ATOMIC_CMPXCHG1, _Z24__gen_ocl_atomic_cmpxchgPU3AS3jjj) // saturation related functions. DECL_LLVM_GEN_FUNCTION(SADD_SAT_CHAR, _Z12ocl_sadd_satcc) DECL_LLVM_GEN_FUNCTION(SADD_SAT_SHORT, _Z12ocl_sadd_satss) DECL_LLVM_GEN_FUNCTION(SADD_SAT_INT, _Z12ocl_sadd_satii) DECL_LLVM_GEN_FUNCTION(SADD_SAT_LONG, _Z12ocl_sadd_satll) DECL_LLVM_GEN_FUNCTION(UADD_SAT_CHAR, _Z12ocl_uadd_sathh) DECL_LLVM_GEN_FUNCTION(UADD_SAT_SHORT, _Z12ocl_uadd_sattt) DECL_LLVM_GEN_FUNCTION(UADD_SAT_INT, _Z12ocl_uadd_satjj) DECL_LLVM_GEN_FUNCTION(UADD_SAT_LONG, _Z12ocl_uadd_satmm) DECL_LLVM_GEN_FUNCTION(SSUB_SAT_CHAR, _Z12ocl_ssub_satcc) DECL_LLVM_GEN_FUNCTION(SSUB_SAT_SHORT, _Z12ocl_ssub_satss) DECL_LLVM_GEN_FUNCTION(SSUB_SAT_INT, _Z12ocl_ssub_satii) DECL_LLVM_GEN_FUNCTION(SSUB_SAT_LONG, _Z12ocl_ssub_satll) DECL_LLVM_GEN_FUNCTION(USUB_SAT_CHAR, _Z12ocl_usub_sathh) DECL_LLVM_GEN_FUNCTION(USUB_SAT_SHORT, _Z12ocl_usub_sattt) DECL_LLVM_GEN_FUNCTION(USUB_SAT_INT, _Z12ocl_usub_satjj) DECL_LLVM_GEN_FUNCTION(USUB_SAT_LONG, _Z12ocl_usub_satmm) DECL_LLVM_GEN_FUNCTION(I64_MAD_SAT, _Z17__gen_ocl_mad_satlll) DECL_LLVM_GEN_FUNCTION(I64_MAD_SATU, _Z17__gen_ocl_mad_satmmm) // integer built-in functions DECL_LLVM_GEN_FUNCTION(MUL_HI_INT, _Z16__gen_ocl_mul_hiii) DECL_LLVM_GEN_FUNCTION(MUL_HI_UINT, _Z16__gen_ocl_mul_hijj) DECL_LLVM_GEN_FUNCTION(MUL_HI_I64, _Z16__gen_ocl_mul_hill) DECL_LLVM_GEN_FUNCTION(MUL_HI_UI64, _Z16__gen_ocl_mul_himm) DECL_LLVM_GEN_FUNCTION(FBH, __gen_ocl_fbh) DECL_LLVM_GEN_FUNCTION(FBL, __gen_ocl_fbl) DECL_LLVM_GEN_FUNCTION(ABS, __gen_ocl_abs) DECL_LLVM_GEN_FUNCTION(HADD, _Z14__gen_ocl_haddjj) DECL_LLVM_GEN_FUNCTION(RHADD, _Z15__gen_ocl_rhaddjj) DECL_LLVM_GEN_FUNCTION(I64HADD, _Z14__gen_ocl_haddmm) DECL_LLVM_GEN_FUNCTION(I64RHADD, _Z15__gen_ocl_rhaddmm) DECL_LLVM_GEN_FUNCTION(UPSAMPLE_SHORT, _Z18__gen_ocl_upsampless) DECL_LLVM_GEN_FUNCTION(UPSAMPLE_INT, _Z18__gen_ocl_upsampleii) DECL_LLVM_GEN_FUNCTION(UPSAMPLE_LONG, _Z18__gen_ocl_upsamplell) // get sampler info DECL_LLVM_GEN_FUNCTION(GET_SAMPLER_INFO, __gen_ocl_get_sampler_info) // saturate convert DECL_LLVM_GEN_FUNCTION(SAT_CONV_U8_TO_I8, _Z16convert_char_sath) DECL_LLVM_GEN_FUNCTION(SAT_CONV_I16_TO_I8, _Z16convert_char_sats) DECL_LLVM_GEN_FUNCTION(SAT_CONV_U16_TO_I8, _Z16convert_char_satt) DECL_LLVM_GEN_FUNCTION(SAT_CONV_I32_TO_I8, _Z16convert_char_sati) DECL_LLVM_GEN_FUNCTION(SAT_CONV_U32_TO_I8, _Z16convert_char_satj) DECL_LLVM_GEN_FUNCTION(SAT_CONV_F32_TO_I8, _Z16convert_char_satf) DECL_LLVM_GEN_FUNCTION(SAT_CONV_I8_TO_U8, _Z17convert_uchar_satc) DECL_LLVM_GEN_FUNCTION(SAT_CONV_I16_TO_U8, _Z17convert_uchar_sats) DECL_LLVM_GEN_FUNCTION(SAT_CONV_U16_TO_U8, _Z17convert_uchar_satt) DECL_LLVM_GEN_FUNCTION(SAT_CONV_I32_TO_U8, _Z17convert_uchar_sati) DECL_LLVM_GEN_FUNCTION(SAT_CONV_U32_TO_U8, _Z17convert_uchar_satj) DECL_LLVM_GEN_FUNCTION(SAT_CONV_F32_TO_U8, _Z17convert_uchar_satf) DECL_LLVM_GEN_FUNCTION(SAT_CONV_U16_TO_I16, _Z17convert_short_satt) DECL_LLVM_GEN_FUNCTION(SAT_CONV_I32_TO_I16, _Z17convert_short_sati) DECL_LLVM_GEN_FUNCTION(SAT_CONV_U32_TO_I16, _Z17convert_short_satj) DECL_LLVM_GEN_FUNCTION(SAT_CONV_F32_TO_I16, _Z17convert_short_satf) DECL_LLVM_GEN_FUNCTION(SAT_CONV_I16_TO_U16, _Z18convert_ushort_sats) DECL_LLVM_GEN_FUNCTION(SAT_CONV_I32_TO_U16, _Z18convert_ushort_sati) DECL_LLVM_GEN_FUNCTION(SAT_CONV_U32_TO_U16, _Z18convert_ushort_satj) DECL_LLVM_GEN_FUNCTION(SAT_CONV_F32_TO_U16, _Z18convert_ushort_satf) DECL_LLVM_GEN_FUNCTION(SAT_CONV_U32_TO_I32, _Z15convert_int_satj) DECL_LLVM_GEN_FUNCTION(SAT_CONV_F32_TO_I32, _Z15convert_int_satf) DECL_LLVM_GEN_FUNCTION(SAT_CONV_I32_TO_U32, _Z16convert_uint_sati) DECL_LLVM_GEN_FUNCTION(SAT_CONV_F32_TO_U32, _Z16convert_uint_satf) Release_v0.3/backend/src/llvm/llvm_passes.cpp000066400000000000000000000304221223142177000213700ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia * Heldge RHodin */ /** * \file llvm_passes.cpp * \author Benjamin Segovia * \author Heldge RHodin */ /* THIS CODE IS DERIVED FROM GPL LLVM PTX BACKEND. CODE IS HERE: * http://sourceforge.net/scm/?type=git&group_id=319085 * Note that however, the original author, Heldge Rhodin, granted me (Benjamin * Segovia) the right to use another license for it (MIT here) */ #include "llvm/Config/config.h" #if LLVM_VERSION_MINOR <= 2 #include "llvm/CallingConv.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Module.h" #include "llvm/Instructions.h" #else #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Module.h" #include "llvm/IR/Instructions.h" #endif /* LLVM_VERSION_MINOR <= 2 */ #include "llvm/Pass.h" #include "llvm/PassManager.h" #if LLVM_VERSION_MINOR <= 2 #include "llvm/Intrinsics.h" #include "llvm/IntrinsicInst.h" #include "llvm/InlineAsm.h" #else #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/InlineAsm.h" #endif /* LLVM_VERSION_MINOR <= 2 */ #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/ConstantsScanner.h" #include "llvm/Analysis/FindUsedTypes.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/IntrinsicLowering.h" #include "llvm/Target/Mangler.h" #include "llvm/Transforms/Scalar.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" #if !defined(LLVM_VERSION_MAJOR) || (LLVM_VERSION_MINOR == 1) #include "llvm/Target/TargetData.h" #elif LLVM_VERSION_MINOR == 2 #include "llvm/DataLayout.h" #else #include "llvm/IR/DataLayout.h" #endif #include "llvm/Support/CallSite.h" #include "llvm/Support/CFG.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #if (LLVM_VERSION_MAJOR == 3) && (LLVM_VERSION_MINOR <= 2) #include "llvm/Support/InstVisitor.h" #else #include "llvm/InstVisitor.h" #endif #include "llvm/Support/MathExtras.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/Host.h" #include "llvm/Support/ToolOutputFile.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Config/config.h" #include "llvm/llvm_gen_backend.hpp" #include "ir/unit.hpp" #include "sys/map.hpp" using namespace llvm; namespace gbe { bool isKernelFunction(const llvm::Function &F) { const Module *module = F.getParent(); const Module::NamedMDListType& globalMD = module->getNamedMDList(); bool bKernel = false; for(auto i = globalMD.begin(); i != globalMD.end(); i++) { const NamedMDNode &md = *i; if(strcmp(md.getName().data(), "opencl.kernels") != 0) continue; uint32_t ops = md.getNumOperands(); for(uint32_t x = 0; x < ops; x++) { MDNode* node = md.getOperand(x); Value * op = node->getOperand(0); if(op == &F) bKernel = true; } } return bKernel; } uint32_t getPadding(uint32_t offset, uint32_t align) { return (align - (offset % align)) % align; } uint32_t getAlignmentByte(const ir::Unit &unit, Type* Ty) { const uint32_t MAX_ALIGN = 8; //maximum size is 8 for doubles switch (Ty->getTypeID()) { case Type::VoidTyID: NOT_SUPPORTED; case Type::VectorTyID: { const VectorType* VecTy = cast(Ty); uint32_t elemNum = VecTy->getNumElements(); if (elemNum == 3) elemNum = 4; // OCL spec return elemNum * getTypeByteSize(unit, VecTy->getElementType()); } case Type::PointerTyID: case Type::IntegerTyID: case Type::FloatTyID: case Type::DoubleTyID: return getTypeBitSize(unit, Ty)/8; case Type::ArrayTyID: return getAlignmentByte(unit, cast(Ty)->getElementType()); case Type::StructTyID: { const StructType* StrTy = cast(Ty); uint32_t maxa = 0; for(uint32_t subtype = 0; subtype < StrTy->getNumElements(); subtype++) { maxa = std::max(getAlignmentByte(unit, StrTy->getElementType(subtype)), maxa); if(maxa==MAX_ALIGN) return maxa; } return maxa; } default: NOT_SUPPORTED; } return 0u; } uint32_t getTypeBitSize(const ir::Unit &unit, Type* Ty) { switch (Ty->getTypeID()) { case Type::VoidTyID: NOT_SUPPORTED; case Type::PointerTyID: return unit.getPointerSize(); case Type::IntegerTyID: return cast(Ty)->getBitWidth(); case Type::FloatTyID: return 32; case Type::DoubleTyID: return 64; case Type::VectorTyID: { const VectorType* VecTy = cast(Ty); return VecTy->getNumElements() * getTypeBitSize(unit, VecTy->getElementType()); } case Type::ArrayTyID: { const ArrayType* ArrTy = cast(Ty); Type* elementType = ArrTy->getElementType(); uint32_t size_element = getTypeBitSize(unit, elementType); uint32_t size = ArrTy->getNumElements() * size_element; uint32_t align = 8 * getAlignmentByte(unit, elementType); size += (ArrTy->getNumElements()-1) * getPadding(size_element, align); return size; } case Type::StructTyID: { const StructType* StrTy = cast(Ty); uint32_t size = 0; for(uint32_t subtype=0; subtype < StrTy->getNumElements(); subtype++) { Type* elementType = StrTy->getElementType(subtype); uint32_t align = 8 * getAlignmentByte(unit, elementType); size += getPadding(size, align); size += getTypeBitSize(unit, elementType); } return size; } default: NOT_SUPPORTED; } return 0u; } uint32_t getTypeByteSize(const ir::Unit &unit, Type* Ty) { uint32_t size_bit = getTypeBitSize(unit, Ty); assert((size_bit%8==0) && "no multiple of 8"); return size_bit/8; } class GenRemoveGEPPasss : public BasicBlockPass { public: static char ID; #define FORMER_VERSION 0 #if FORMER_VERSION GenRemoveGEPPasss(map& parentCompositePointer) : BasicBlockPass(ID), parentPointers(parentCompositePointer) {} map& parentPointers; #else GenRemoveGEPPasss(const ir::Unit &unit) : BasicBlockPass(ID), unit(unit) {} const ir::Unit &unit; #endif void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); } virtual const char *getPassName() const { return "PTX backend: insert special ptx instructions"; } bool simplifyGEPInstructions(GetElementPtrInst* GEPInst); virtual bool runOnBasicBlock(BasicBlock &BB) { bool changedBlock = false; iplist::iterator I = BB.getInstList().begin(); for (auto nextI = I, E = --BB.getInstList().end(); I != E; I = nextI) { iplist::iterator I = nextI++; if(GetElementPtrInst* gep = dyn_cast(&*I)) changedBlock = (simplifyGEPInstructions(gep) || changedBlock); } return changedBlock; } }; char GenRemoveGEPPasss::ID = 0; bool GenRemoveGEPPasss::simplifyGEPInstructions(GetElementPtrInst* GEPInst) { const uint32_t ptrSize = unit.getPointerSize(); Value* parentPointer = GEPInst->getOperand(0); #if FORMER_VERSION Value* topParent = parentPointer; #endif CompositeType* CompTy = cast(parentPointer->getType()); Value* currentAddrInst = new PtrToIntInst(parentPointer, IntegerType::get(GEPInst->getContext(), ptrSize), "", GEPInst); uint32_t constantOffset = 0; for(uint32_t op=1; opgetNumOperands(); ++op) { uint32_t TypeIndex; //we have a constant struct/array acces if(ConstantInt* ConstOP = dyn_cast(GEPInst->getOperand(op))) { uint32_t offset = 0; TypeIndex = ConstOP->getZExtValue(); for(uint32_t ty_i=0; ty_igetTypeAtIndex(ty_i); uint32_t align = getAlignmentByte(unit, elementType); offset += getPadding(offset, align); offset += getTypeByteSize(unit, elementType); } //add getPaddingding for accessed type const uint32_t align = getAlignmentByte(unit, CompTy->getTypeAtIndex(TypeIndex)); offset += getPadding(offset, align); constantOffset += offset; } // none constant index (=> only array/verctor allowed) else { // we only have array/vectors here, // therefore all elements have the same size TypeIndex = 0; Type* elementType = CompTy->getTypeAtIndex(TypeIndex); uint32_t size = getTypeByteSize(unit, elementType); //add padding uint32_t align = getAlignmentByte(unit, elementType); size += getPadding(size, align); Constant* newConstSize = ConstantInt::get(IntegerType::get(GEPInst->getContext(), ptrSize), size); Value *operand = GEPInst->getOperand(op); //HACK TODO: Inserted by type replacement.. this code could break something???? if(getTypeByteSize(unit, operand->getType())>4) { GBE_ASSERTM(false, "CHECK IT"); operand->dump(); //previous instruction is sext or zext instr. ignore it CastInst *cast = dyn_cast(operand); if(cast && (isa(operand) || isa(operand))) { //hope that CastInst is a s/zext operand = cast->getOperand(0); } else { //trunctate operand = new TruncInst(operand, IntegerType::get(GEPInst->getContext(), ptrSize), "", GEPInst); } } BinaryOperator* tmpMul = BinaryOperator::Create(Instruction::Mul, newConstSize, operand, "", GEPInst); currentAddrInst = BinaryOperator::Create(Instruction::Add, currentAddrInst, tmpMul, "", GEPInst); } //step down in type hirachy CompTy = dyn_cast(CompTy->getTypeAtIndex(TypeIndex)); } //insert addition of new offset before GEPInst Constant* newConstOffset = ConstantInt::get(IntegerType::get(GEPInst->getContext(), ptrSize), constantOffset); currentAddrInst = BinaryOperator::Create(Instruction::Add, currentAddrInst, newConstOffset, "", GEPInst); //convert offset to ptr type (nop) IntToPtrInst* intToPtrInst = new IntToPtrInst(currentAddrInst,GEPInst->getType(),"", GEPInst); //replace uses of the GEP instruction with the newly calculated pointer GEPInst->replaceAllUsesWith(intToPtrInst); GEPInst->dropAllReferences(); GEPInst->removeFromParent(); #if FORMER_VERSION //insert new pointer into parent list while(parentPointers.find(topParent)!=parentPointers.end()) topParent = parentPointers.find(topParent)->second; parentPointers[intToPtrInst] = topParent; #endif return true; } BasicBlockPass *createRemoveGEPPass(const ir::Unit &unit) { return new GenRemoveGEPPasss(unit); } } /* namespace gbe */ Release_v0.3/backend/src/llvm/llvm_scalarize.cpp000066400000000000000000000654261223142177000220630ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . */ /** * \file llvm_scalarize.cpp * \author Yang Rong * * This file is derived from: * https://code.google.com/p/lunarglass/source/browse/trunk/Core/Passes/Transforms/Scalarize.cpp?r=605 */ //===- Scalarize.cpp - Scalarize LunarGLASS IR ----------------------------===// // // LunarGLASS: An Open Modular Shader Compiler Architecture // Copyright (C) 2010-2011 LunarG, Inc. // // This program is free software; you can redistribute it and/or // modify it under the terms of the GNU General Public License // as published by the Free Software Foundation; version 2 of the // License. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA // 02110-1301, USA. // //===----------------------------------------------------------------------===// // // Author: Michael Ilseman, LunarG // //===----------------------------------------------------------------------===// // // Scalarize the IR. // * Loads of uniforms become multiple loadComponent calls // // * Reads/writes become read/writeComponent calls // // * Component-wise operations become multiple ops over each component // // * Texture call become recomponsed texture calls // // * Vector ops disappear, with their users referring to the scalarized // * components // //===----------------------------------------------------------------------===// #include "llvm/Config/config.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/PostOrderIterator.h" #if LLVM_VERSION_MINOR <= 2 #include "llvm/Function.h" #include "llvm/InstrTypes.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/Module.h" #else #include "llvm/IR/Function.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #endif /* LLVM_VERSION_MINOR <= 2 */ #include "llvm/Pass.h" #if LLVM_VERSION_MINOR <= 1 #include "llvm/Support/IRBuilder.h" #elif LLVM_VERSION_MINOR == 2 #include "llvm/IRBuilder.h" #else #include "llvm/IR/IRBuilder.h" #endif /* LLVM_VERSION_MINOR <= 1 */ #include "llvm/Support/CallSite.h" #include "llvm/Support/CFG.h" #include "llvm/Support/raw_ostream.h" #include "llvm/llvm_gen_backend.hpp" #include "sys/map.hpp" using namespace llvm; namespace gbe { struct VectorValues { VectorValues() : vals() { } void setComponent(int c, llvm::Value* val) { assert(c >= 0 && c < 16 && "Out of bounds component"); vals[c] = val; } llvm::Value* getComponent(int c) { assert(c >= 0 && c < 16 && "Out of bounds component"); assert(vals[c] && "Requesting non-existing component"); return vals[c]; } // {Value* x, Value* y, Value* z, Value* w} llvm::Value* vals[16]; }; class Scalarize : public FunctionPass { public: // Standard pass stuff static char ID; Scalarize() : FunctionPass(ID) { initializeLoopInfoPass(*PassRegistry::getPassRegistry()); initializeDominatorTreePass(*PassRegistry::getPassRegistry()); } virtual bool runOnFunction(Function&); void print(raw_ostream&, const Module* = 0) const; virtual void getAnalysisUsage(AnalysisUsage&) const; protected: // An instruction is valid post-scalarization iff it is fully scalar or it // is a gla_loadn bool isValid(const Instruction*); // Take an instruction that produces a vector, and scalarize it bool scalarize(Instruction*); bool scalarizePerComponent(Instruction*); bool scalarizeFuncCall(CallInst *); bool scalarizeLoad(LoadInst*); bool scalarizeStore(StoreInst*); //bool scalarizeIntrinsic(IntrinsicInst*); bool scalarizeExtract(ExtractElementInst*); bool scalarizeInsert(InsertElementInst*); bool scalarizeShuffleVector(ShuffleVectorInst*); bool scalarizePHI(PHINode*); void scalarizeArgs(Function& F); // ... // Helpers to make the actual multiple scalar calls, one per // component. Updates the given VectorValues's components with the new // Values. void makeScalarizedCalls(Function*, ArrayRef, int numComponents, VectorValues&); void makePerComponentScalarizedCalls(Instruction*, ArrayRef); // Makes a scalar form of the given instruction: replaces the operands // and chooses a correct return type Instruction* createScalarInstruction(Instruction* inst, ArrayRef); // Gather the specified components in the given values. Returns the // component if the given value is a vector, or the scalar itself. void gatherComponents(int component, ArrayRef args, SmallVectorImpl& componentArgs); // Get the assigned component for that value. If the value is a scalar, // returns the scalar. If it's a constant, returns that component. If // it's an instruction, returns the vectorValues of that instruction for // that component Value* getComponent(int component, Value*); // Used for assertion purposes. Whether we can get the component out with // a getComponent call bool canGetComponent(Value*); // Used for assertion purposes. Whether for every operand we can get // components with a getComponent call bool canGetComponentArgs(User*); // Delete the instruction in the deadList void dce(); int GetConstantInt(const Value* value); bool IsPerComponentOp(const Instruction* inst); bool IsPerComponentOp(const Value* value); //these function used to add extract and insert instructions when load/store etc. void extractFromVector(Value* insn); Value* InsertToVector(Value* insn, Value* vecValue); Type* GetBasicType(Value* value) { return GetBasicType(value->getType()); } Type* GetBasicType(Type* type) { switch(type->getTypeID()) { case Type::VectorTyID: case Type::ArrayTyID: return GetBasicType(type->getContainedType(0)); default: break; } return type; } int GetComponentCount(const Type* type) { if (type->getTypeID() == Type::VectorTyID) return llvm::dyn_cast(type)->getNumElements(); else return 1; } int GetComponentCount(const Value* value) { return GetComponentCount(value->getType()); } DenseMap vectorVals; Module* module; IRBuilder<>* builder; Type* intTy; Type* floatTy; std::vector deadList; // List of vector phis that were not completely scalarized because some // of their operands hadn't before been visited (i.e. loop variant // variables) SmallVector incompletePhis; }; Value* Scalarize::getComponent(int component, Value* v) { assert(canGetComponent(v) && "getComponent called on unhandled vector"); if (v->getType()->isVectorTy()) { if (ConstantDataVector* c = dyn_cast(v)) { return c->getElementAsConstant(component); } else if (ConstantVector* c = dyn_cast(v)) { return c->getOperand(component); } else if (isa(v)) { return Constant::getNullValue(GetBasicType(v)); } else if (isa(v)) { return UndefValue::get(GetBasicType(v)); } else { return vectorVals[v].getComponent(component); } } else { return v; } } bool IsPerComponentOp(const llvm::Value* value) { const llvm::Instruction* inst = llvm::dyn_cast(value); return inst && IsPerComponentOp(inst); } bool Scalarize::IsPerComponentOp(const Instruction* inst) { //if (const IntrinsicInst* intr = dyn_cast(inst)) // return IsPerComponentOp(intr); if (inst->isTerminator()) return false; switch (inst->getOpcode()) { // Cast ops are only per-component if they cast back to the same vector // width case Instruction::Trunc: case Instruction::ZExt: case Instruction::SExt: case Instruction::FPToUI: case Instruction::FPToSI: case Instruction::UIToFP: case Instruction::SIToFP: case Instruction::FPTrunc: case Instruction::FPExt: case Instruction::PtrToInt: case Instruction::IntToPtr: case Instruction::BitCast: return GetComponentCount(inst->getOperand(0)) == GetComponentCount(inst); // Vector ops case Instruction::InsertElement: case Instruction::ExtractElement: case Instruction::ShuffleVector: // Ways of accessing/loading/storing vectors case Instruction::ExtractValue: case Instruction::InsertValue: // Memory ops case Instruction::Alloca: case Instruction::Load: case Instruction::Store: case Instruction::GetElementPtr: // Phis are a little special. We consider them not to be per-component // because the mechanism of choice is a single value (what path we took to // get here), and doesn't choose per-component (as select would). The caller // should know to handle phis specially case Instruction::PHI: // Call insts, conservatively are no per-component case Instruction::Call: // Misc case Instruction::LandingPad: //--- 3.0 case Instruction::VAArg: return false; } // end of switch (inst->getOpcode()) return true; } int Scalarize::GetConstantInt(const Value* value) { const ConstantInt *constantInt = dyn_cast(value); // this might still be a constant expression, rather than a numeric constant, // e.g., expression with undef's in it, so it was not folded if (! constantInt) NOT_IMPLEMENTED; //gla::UnsupportedFunctionality("non-simple constant"); return constantInt->getValue().getSExtValue(); } bool Scalarize::canGetComponent(Value* v) { if (v->getType()->isVectorTy()) { if (isa(v) || isa(v) || isa(v) || isa(v)) { return true; } else { assert((isa(v) || isa(v)) && "Non-constant non-instuction?"); return vectorVals.count(v); } } else { return true; } } bool Scalarize::canGetComponentArgs(User* u) { if (PHINode* phi = dyn_cast(u)) { for (unsigned int i = 0; i < phi->getNumIncomingValues(); ++i) if (!canGetComponent(phi->getIncomingValue(i))) return false; } else { for (User::op_iterator i = u->op_begin(), e = u->op_end(); i != e; ++i) if (!canGetComponent(*i)) return false; } return true; } void Scalarize::gatherComponents(int component, ArrayRef args, SmallVectorImpl& componentArgs) { componentArgs.clear(); for (ArrayRef::iterator i = args.begin(), e = args.end(); i != e; ++i) componentArgs.push_back(getComponent(component, *i)); } Instruction* Scalarize::createScalarInstruction(Instruction* inst, ArrayRef args) { // TODO: Refine the below into one large switch unsigned op = inst->getOpcode(); if (inst->isCast()) { assert(args.size() == 1 && "incorrect number of arguments for cast op"); return CastInst::Create((Instruction::CastOps)op, args[0], GetBasicType(inst)); } if (inst->isBinaryOp()) { assert(args.size() == 2 && "incorrect number of arguments for binary op"); return BinaryOperator::Create((Instruction::BinaryOps)op, args[0], args[1]); } if (PHINode* phi = dyn_cast(inst)) { PHINode* res = PHINode::Create(GetBasicType(inst), phi->getNumIncomingValues()); // Loop over pairs of operands: [Value*, BasicBlock*] for (unsigned int i = 0; i < args.size(); i++) { BasicBlock* bb = phi->getIncomingBlock(i); //dyn_cast(args[i+1]); //assert(bb && "Non-basic block incoming block?"); res->addIncoming(args[i], bb); } return res; } if (CmpInst* cmpInst = dyn_cast(inst)) { assert(args.size() == 2 && "incorrect number of arguments for comparison"); return CmpInst::Create(cmpInst->getOpcode(), cmpInst->getPredicate(), args[0], args[1]); } if (isa(inst)) { assert(args.size() == 3 && "incorrect number of arguments for select"); return SelectInst::Create(args[0], args[1], args[2]); } if (IntrinsicInst* intr = dyn_cast(inst)) { if (! IsPerComponentOp(inst)) NOT_IMPLEMENTED; //gla::UnsupportedFunctionality("Scalarize instruction on a non-per-component intrinsic"); // TODO: Assumption is that all per-component intrinsics have all their // arguments be overloadable. Need to find some way to assert on this // assumption. This is due to how getDeclaration operates; it only takes // a list of types that fit overloadable slots. SmallVector tys(1, GetBasicType(inst->getType())); // Call instructions have the decl as a last argument, so skip it for (ArrayRef::iterator i = args.begin(), e = args.end() - 1; i != e; ++i) { tys.push_back(GetBasicType((*i)->getType())); } Function* f = Intrinsic::getDeclaration(module, intr->getIntrinsicID(), tys); return CallInst::Create(f, args); } NOT_IMPLEMENTED; //gla::UnsupportedFunctionality("Currently unsupported instruction: ", inst->getOpcode(), // inst->getOpcodeName()); return 0; } void Scalarize::makeScalarizedCalls(Function* f, ArrayRef args, int count, VectorValues& vVals) { assert(count > 0 && count <= 16 && "invalid number of vector components"); for (int i = 0; i < count; ++i) { Value* res; SmallVector callArgs(args.begin(), args.end()); callArgs.push_back(ConstantInt::get(intTy, i)); res = builder->CreateCall(f, callArgs); vVals.setComponent(i, res); } } void Scalarize::makePerComponentScalarizedCalls(Instruction* inst, ArrayRef args) { int count = GetComponentCount(inst); assert(count > 0 && count <= 16 && "invalid number of vector components"); assert((inst->getNumOperands() == args.size() || isa(inst)) && "not enough arguments passed for instruction"); VectorValues& vVals = vectorVals[inst]; for (int i = 0; i < count; ++i) { // Set this component of each arg SmallVector callArgs(args.size(), 0); gatherComponents(i, args, callArgs); Instruction* res = createScalarInstruction(inst, callArgs); vVals.setComponent(i, res); builder->Insert(res); } } bool Scalarize::isValid(const Instruction* inst) { // The result if (inst->getType()->isVectorTy()) return false; // The arguments for (Instruction::const_op_iterator i = inst->op_begin(), e = inst->op_end(); i != e; ++i) { const Value* v = (*i); assert(v); if (v->getType()->isVectorTy()) return false; } return true; } bool Scalarize::scalarize(Instruction* inst) { if (isValid(inst)) return false; assert(! vectorVals.count(inst) && "We've already scalarized this somehow?"); assert((canGetComponentArgs(inst) || isa(inst)) && "Scalarizing an op whose arguments haven't been scalarized "); builder->SetInsertPoint(inst); if (IsPerComponentOp(inst)) return scalarizePerComponent(inst); if (LoadInst* ld = dyn_cast(inst)) return scalarizeLoad(ld); if (CallInst* call = dyn_cast(inst)) return scalarizeFuncCall(call); if (ExtractElementInst* extr = dyn_cast(inst)) return scalarizeExtract(extr); if (InsertElementInst* ins = dyn_cast(inst)) return scalarizeInsert(ins); if (ShuffleVectorInst* sv = dyn_cast(inst)) return scalarizeShuffleVector(sv); if (PHINode* phi = dyn_cast(inst)) return scalarizePHI(phi); if (isa(inst) || isa(inst)) // TODO: need to come up with a struct/array model for scalarization NOT_IMPLEMENTED; //gla::UnsupportedFunctionality("Scalarizing struct/array ops"); if (StoreInst* st = dyn_cast(inst)) return scalarizeStore(st); NOT_IMPLEMENTED; //gla::UnsupportedFunctionality("Currently unhandled instruction ", inst->getOpcode(), inst->getOpcodeName()); return false; } bool Scalarize::scalarizeShuffleVector(ShuffleVectorInst* sv) { // %res = shuffleVector %foo, bar, <...> // ==> nothing (just make a new VectorValues with the new components) VectorValues& vVals = vectorVals[sv]; int size = GetComponentCount(sv); int srcSize = GetComponentCount(sv->getOperand(0)->getType()); for (int i = 0; i < size; ++i) { int select = sv->getMaskValue(i); if (select < 0) { vVals.setComponent(i, UndefValue::get(GetBasicType(sv->getOperand(0)))); continue; } // Otherwise look up the corresponding component from the correct // source. Value* selectee; if (select < srcSize) { selectee = sv->getOperand(0); } else { // Choose from the second operand select -= srcSize; selectee = sv->getOperand(1); } vVals.setComponent(i, getComponent(select, selectee)); } return true; } bool Scalarize::scalarizePerComponent(Instruction* inst) { // dst = op %foo, %bar // ==> dstx = op ty %foox, ty %barx // dsty = op ty %fooy, ty %bary // ... SmallVector args(inst->op_begin(), inst->op_end()); makePerComponentScalarizedCalls(inst, args); return true; } bool Scalarize::scalarizePHI(PHINode* phi) { // dst = phi [ %foo, %bb1 ], [ %bar, %bb2], ... // ==> dstx = phi ty [ %foox, %bb1 ], [ %barx, %bb2], ... // dsty = phi ty [ %fooy, %bb1 ], [ %bary, %bb2], ... // If the scalar values are all known up-front, then just make the full // phinode now. If they are not yet known (phinode for a loop variant // variable), then deferr the arguments until later if (canGetComponentArgs(phi)) { SmallVector args(phi->op_begin(), phi->op_end()); makePerComponentScalarizedCalls(phi, args); } else { makePerComponentScalarizedCalls(phi, ArrayRef()); incompletePhis.push_back(phi); } return true; } void Scalarize::extractFromVector(Value* insn) { VectorValues& vVals = vectorVals[insn]; for (int i = 0; i < GetComponentCount(insn); ++i) { Value *cv = ConstantInt::get(intTy, i); Value *EI = builder->CreateExtractElement(insn, cv); vVals.setComponent(i, EI); } } Value* Scalarize::InsertToVector(Value * insn, Value* vecValue) { //VectorValues& vVals = vectorVals[writeValue]; //add fake insert instructions to avoid removed Value *II = NULL; for (int i = 0; i < GetComponentCount(vecValue); ++i) { Value *vec = II ? II : UndefValue::get(vecValue->getType()); Value *cv = ConstantInt::get(intTy, i); II = builder->CreateInsertElement(vec, getComponent(i, vecValue), cv); } return II; } bool Scalarize::scalarizeFuncCall(CallInst* call) { if (Function *F = call->getCalledFunction()) { if (F->getIntrinsicID() != 0) { //Intrinsic functions NOT_IMPLEMENTED; } else { Value *Callee = call->getCalledValue(); const std::string fnName = Callee->getName(); auto it = instrinsicMap.map.find(fnName); GBE_ASSERT(it != instrinsicMap.map.end()); // Get the function arguments CallSite CS(call); CallSite::arg_iterator CI = CS.arg_begin() + 3; switch (it->second) { default: break; case GEN_OCL_READ_IMAGE0: case GEN_OCL_READ_IMAGE1: case GEN_OCL_READ_IMAGE2: case GEN_OCL_READ_IMAGE3: case GEN_OCL_READ_IMAGE4: case GEN_OCL_READ_IMAGE5: case GEN_OCL_READ_IMAGE10: case GEN_OCL_READ_IMAGE11: case GEN_OCL_READ_IMAGE12: case GEN_OCL_READ_IMAGE13: case GEN_OCL_READ_IMAGE14: case GEN_OCL_READ_IMAGE15: case GEN_OCL_GET_IMAGE_WIDTH: case GEN_OCL_GET_IMAGE_HEIGHT: { extractFromVector(call); break; } case GEN_OCL_WRITE_IMAGE10: case GEN_OCL_WRITE_IMAGE11: case GEN_OCL_WRITE_IMAGE12: case GEN_OCL_WRITE_IMAGE13: case GEN_OCL_WRITE_IMAGE14: case GEN_OCL_WRITE_IMAGE15: CI++; case GEN_OCL_WRITE_IMAGE0: case GEN_OCL_WRITE_IMAGE1: case GEN_OCL_WRITE_IMAGE2: case GEN_OCL_WRITE_IMAGE3: case GEN_OCL_WRITE_IMAGE4: case GEN_OCL_WRITE_IMAGE5: { *CI = InsertToVector(call, *CI); break; } } } } return false; } bool Scalarize::scalarizeLoad(LoadInst* ld) { extractFromVector(ld); return false; } bool Scalarize::scalarizeStore(StoreInst* st) { st->setOperand(0, InsertToVector(st, st->getValueOperand())); return false; } bool Scalarize::scalarizeExtract(ExtractElementInst* extr) { // %res = extractelement %foo, %i // ==> nothing (just use %foo's %ith component instead of %res) if (! isa(extr->getOperand(1))) { // TODO: Variably referenced components. Probably handle/emulate through // a series of selects. NOT_IMPLEMENTED; //gla::UnsupportedFunctionality("Variably referenced vector components"); } //if (isa(extr->getOperand(0))) // return false; int component = GetConstantInt(extr->getOperand(1)); Value* v = getComponent(component, extr->getOperand(0)); if(extr == v) return false; extr->replaceAllUsesWith(v); return true; } bool Scalarize::scalarizeInsert(InsertElementInst* ins) { // %res = insertValue %foo, %i // ==> nothing (just make a new VectorValues with the new component) if (! isa(ins->getOperand(2))) { // TODO: Variably referenced components. Probably handle/emulate through // a series of selects. NOT_IMPLEMENTED; //gla::UnsupportedFunctionality("Variably referenced vector components"); } int component = GetConstantInt(ins->getOperand(2)); VectorValues& vVals = vectorVals[ins]; for (int i = 0; i < GetComponentCount(ins); ++i) { vVals.setComponent(i, i == component ? ins->getOperand(1) : getComponent(i, ins->getOperand(0))); } return true; } void Scalarize::scalarizeArgs(Function& F) { if (F.arg_empty()) return; ReversePostOrderTraversal rpot(&F); BasicBlock::iterator instI = (*rpot.begin())->begin(); builder->SetInsertPoint(instI); Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); for (; I != E; ++I) { Type *type = I->getType(); if(type->isVectorTy()) extractFromVector(I); } return; } bool Scalarize::runOnFunction(Function& F) { switch (F.getCallingConv()) { #if LLVM_VERSION_MINOR <= 2 case CallingConv::PTX_Device: return false; case CallingConv::PTX_Kernel: #else case CallingConv::C: #endif break; default: GBE_ASSERTM(false, "Unsupported calling convention"); } // As we inline all function calls, so skip non-kernel functions bool bKernel = isKernelFunction(F); if(!bKernel) return false; bool changed = false; module = F.getParent(); intTy = IntegerType::get(module->getContext(), 32); floatTy = Type::getFloatTy(module->getContext()); builder = new IRBuilder<>(module->getContext()); scalarizeArgs(F); typedef ReversePostOrderTraversal RPOTType; RPOTType rpot(&F); for (RPOTType::rpo_iterator bbI = rpot.begin(), bbE = rpot.end(); bbI != bbE; ++bbI) { for (BasicBlock::iterator instI = (*bbI)->begin(), instE = (*bbI)->end(); instI != instE; ++instI) { bool scalarized = scalarize(instI); if (scalarized) { changed = true; // TODO: uncomment when done deadList.push_back(instI); } } } // Fill in the incomplete phis for (SmallVectorImpl::iterator phiI = incompletePhis.begin(), phiE = incompletePhis.end(); phiI != phiE; ++phiI) { assert(canGetComponentArgs(*phiI) && "Phi's operands never scalarized"); // Fill in each component of this phi VectorValues& vVals = vectorVals[*phiI]; for (int c = 0; c < GetComponentCount(*phiI); ++c) { PHINode* compPhi = dyn_cast(vVals.getComponent(c)); assert(compPhi && "Vector phi got scalarized to non-phis?"); // Loop over pairs of operands: [Value*, BasicBlock*] for (unsigned int i = 0; i < (*phiI)->getNumOperands(); i++) { BasicBlock* bb = (*phiI)->getIncomingBlock(i); assert(bb && "Non-basic block incoming block?"); compPhi->addIncoming(getComponent(c, (*phiI)->getOperand(i)), bb); } } } dce(); incompletePhis.clear(); vectorVals.clear(); delete builder; builder = 0; return changed; } void Scalarize::dce() { //two passes delete for some phinode for (std::vector::reverse_iterator i = deadList.rbegin(), e = deadList.rend(); i != e; ++i) { (*i)->dropAllReferences(); if((*i)->use_empty()) { (*i)->eraseFromParent(); (*i) = NULL; } } for (std::vector::reverse_iterator i = deadList.rbegin(), e = deadList.rend(); i != e; ++i) { if((*i) && (*i)->getParent()) (*i)->eraseFromParent(); } deadList.clear(); } void Scalarize::getAnalysisUsage(AnalysisUsage& AU) const { } void Scalarize::print(raw_ostream&, const Module*) const { return; } FunctionPass* createScalarizePass() { return new Scalarize(); } char Scalarize::ID = 0; } // end namespace Release_v0.3/backend/src/llvm/llvm_to_gen.cpp000066400000000000000000000062421223142177000213500ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file llvm_to_gen.cpp * \author Benjamin Segovia */ #include "llvm/Config/config.h" #if LLVM_VERSION_MINOR <= 2 #include "llvm/LLVMContext.h" #include "llvm/Module.h" #else #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #endif /* LLVM_VERSION_MINOR <= 2 */ #include "llvm/PassManager.h" #include "llvm/Pass.h" #if LLVM_VERSION_MINOR <= 2 #include "llvm/Support/IRReader.h" #else #include "llvm/IRReader/IRReader.h" #include "llvm/Support/SourceMgr.h" #endif /* LLVM_VERSION_MINOR <= 2 */ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Assembly/PrintModulePass.h" #include "llvm/llvm_gen_backend.hpp" #include "llvm/llvm_to_gen.hpp" #include "sys/cvar.hpp" #include "sys/platform.hpp" #include #include #include #include namespace gbe { BVAR(OCL_OUTPUT_LLVM, false); BVAR(OCL_OUTPUT_LLVM_BEFORE_EXTRA_PASS, false); bool llvmToGen(ir::Unit &unit, const char *fileName) { using namespace llvm; // Get the global LLVM context llvm::LLVMContext& c = llvm::getGlobalContext(); std::string errInfo; std::unique_ptr o = NULL; if (OCL_OUTPUT_LLVM_BEFORE_EXTRA_PASS || OCL_OUTPUT_LLVM) o = std::unique_ptr(new llvm::raw_fd_ostream(fileno(stdout), false)); // Get the module from its file llvm::SMDiagnostic Err; std::auto_ptr M; M.reset(ParseIRFile(fileName, Err, c)); if (M.get() == 0) return false; Module &mod = *M.get(); llvm::PassManager passes; // Print the code before further optimizations if (OCL_OUTPUT_LLVM_BEFORE_EXTRA_PASS) passes.add(createPrintModulePass(&*o)); passes.add(createScalarizePass()); // Expand all vector ops passes.add(createScalarReplAggregatesPass()); // Break up allocas passes.add(createRemoveGEPPass(unit)); passes.add(createConstantPropagationPass()); passes.add(createDeadInstEliminationPass()); // Remove simplified instructions passes.add(createLowerSwitchPass()); passes.add(createPromoteMemoryToRegisterPass()); passes.add(createGVNPass()); // Remove redundancies passes.add(createGenPass(unit)); // Print the code extra optimization passes if (OCL_OUTPUT_LLVM) passes.add(createPrintModulePass(&*o)); passes.run(mod); return true; } } /* namespace gbe */ Release_v0.3/backend/src/llvm/llvm_to_gen.hpp000066400000000000000000000023111223142177000213460ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file llvm_to_gen.hpp * \author Benjamin Segovia */ #ifndef __GBE_IR_LLVM_TO_GEN_HPP__ #define __GBE_IR_LLVM_TO_GEN_HPP__ namespace gbe { namespace ir { // The code is output into an IR unit class Unit; } /* namespace ir */ /*! Convert the LLVM IR code to a GEN IR code */ bool llvmToGen(ir::Unit &unit, const char *fileName); } /* namespace gbe */ #endif /* __GBE_IR_LLVM_TO_GEN_HPP__ */ Release_v0.3/backend/src/ocl_as.h000066400000000000000000001173331223142177000170020ustar00rootroot00000000000000// This file is autogenerated by gen_as.sh. // Don't modify it manually. union _type_cast_1_b { char _char; uchar _uchar; }; INLINE OVERLOADABLE uchar as_uchar(char v) { union _type_cast_1_b u; u._char = v; return u._uchar; } INLINE OVERLOADABLE char as_char(uchar v) { union _type_cast_1_b u; u._uchar = v; return u._char; } union _type_cast_2_b { short _short; ushort _ushort; char2 _char2; uchar2 _uchar2; }; INLINE OVERLOADABLE ushort as_ushort(short v) { union _type_cast_2_b u; u._short = v; return u._ushort; } INLINE OVERLOADABLE char2 as_char2(short v) { union _type_cast_2_b u; u._short = v; return u._char2; } INLINE OVERLOADABLE uchar2 as_uchar2(short v) { union _type_cast_2_b u; u._short = v; return u._uchar2; } INLINE OVERLOADABLE short as_short(ushort v) { union _type_cast_2_b u; u._ushort = v; return u._short; } INLINE OVERLOADABLE char2 as_char2(ushort v) { union _type_cast_2_b u; u._ushort = v; return u._char2; } INLINE OVERLOADABLE uchar2 as_uchar2(ushort v) { union _type_cast_2_b u; u._ushort = v; return u._uchar2; } INLINE OVERLOADABLE short as_short(char2 v) { union _type_cast_2_b u; u._char2 = v; return u._short; } INLINE OVERLOADABLE ushort as_ushort(char2 v) { union _type_cast_2_b u; u._char2 = v; return u._ushort; } INLINE OVERLOADABLE uchar2 as_uchar2(char2 v) { union _type_cast_2_b u; u._char2 = v; return u._uchar2; } INLINE OVERLOADABLE short as_short(uchar2 v) { union _type_cast_2_b u; u._uchar2 = v; return u._short; } INLINE OVERLOADABLE ushort as_ushort(uchar2 v) { union _type_cast_2_b u; u._uchar2 = v; return u._ushort; } INLINE OVERLOADABLE char2 as_char2(uchar2 v) { union _type_cast_2_b u; u._uchar2 = v; return u._char2; } union _type_cast_3_b { char3 _char3; uchar3 _uchar3; }; INLINE OVERLOADABLE uchar3 as_uchar3(char3 v) { union _type_cast_3_b u; u._char3 = v; return u._uchar3; } INLINE OVERLOADABLE char3 as_char3(uchar3 v) { union _type_cast_3_b u; u._uchar3 = v; return u._char3; } union _type_cast_4_b { int _int; uint _uint; short2 _short2; ushort2 _ushort2; char4 _char4; uchar4 _uchar4; float _float; }; INLINE OVERLOADABLE uint as_uint(int v) { union _type_cast_4_b u; u._int = v; return u._uint; } INLINE OVERLOADABLE short2 as_short2(int v) { union _type_cast_4_b u; u._int = v; return u._short2; } INLINE OVERLOADABLE ushort2 as_ushort2(int v) { union _type_cast_4_b u; u._int = v; return u._ushort2; } INLINE OVERLOADABLE char4 as_char4(int v) { union _type_cast_4_b u; u._int = v; return u._char4; } INLINE OVERLOADABLE uchar4 as_uchar4(int v) { union _type_cast_4_b u; u._int = v; return u._uchar4; } INLINE OVERLOADABLE float as_float(int v) { union _type_cast_4_b u; u._int = v; return u._float; } INLINE OVERLOADABLE int as_int(uint v) { union _type_cast_4_b u; u._uint = v; return u._int; } INLINE OVERLOADABLE short2 as_short2(uint v) { union _type_cast_4_b u; u._uint = v; return u._short2; } INLINE OVERLOADABLE ushort2 as_ushort2(uint v) { union _type_cast_4_b u; u._uint = v; return u._ushort2; } INLINE OVERLOADABLE char4 as_char4(uint v) { union _type_cast_4_b u; u._uint = v; return u._char4; } INLINE OVERLOADABLE uchar4 as_uchar4(uint v) { union _type_cast_4_b u; u._uint = v; return u._uchar4; } INLINE OVERLOADABLE float as_float(uint v) { union _type_cast_4_b u; u._uint = v; return u._float; } INLINE OVERLOADABLE int as_int(short2 v) { union _type_cast_4_b u; u._short2 = v; return u._int; } INLINE OVERLOADABLE uint as_uint(short2 v) { union _type_cast_4_b u; u._short2 = v; return u._uint; } INLINE OVERLOADABLE ushort2 as_ushort2(short2 v) { union _type_cast_4_b u; u._short2 = v; return u._ushort2; } INLINE OVERLOADABLE char4 as_char4(short2 v) { union _type_cast_4_b u; u._short2 = v; return u._char4; } INLINE OVERLOADABLE uchar4 as_uchar4(short2 v) { union _type_cast_4_b u; u._short2 = v; return u._uchar4; } INLINE OVERLOADABLE float as_float(short2 v) { union _type_cast_4_b u; u._short2 = v; return u._float; } INLINE OVERLOADABLE int as_int(ushort2 v) { union _type_cast_4_b u; u._ushort2 = v; return u._int; } INLINE OVERLOADABLE uint as_uint(ushort2 v) { union _type_cast_4_b u; u._ushort2 = v; return u._uint; } INLINE OVERLOADABLE short2 as_short2(ushort2 v) { union _type_cast_4_b u; u._ushort2 = v; return u._short2; } INLINE OVERLOADABLE char4 as_char4(ushort2 v) { union _type_cast_4_b u; u._ushort2 = v; return u._char4; } INLINE OVERLOADABLE uchar4 as_uchar4(ushort2 v) { union _type_cast_4_b u; u._ushort2 = v; return u._uchar4; } INLINE OVERLOADABLE float as_float(ushort2 v) { union _type_cast_4_b u; u._ushort2 = v; return u._float; } INLINE OVERLOADABLE int as_int(char4 v) { union _type_cast_4_b u; u._char4 = v; return u._int; } INLINE OVERLOADABLE uint as_uint(char4 v) { union _type_cast_4_b u; u._char4 = v; return u._uint; } INLINE OVERLOADABLE short2 as_short2(char4 v) { union _type_cast_4_b u; u._char4 = v; return u._short2; } INLINE OVERLOADABLE ushort2 as_ushort2(char4 v) { union _type_cast_4_b u; u._char4 = v; return u._ushort2; } INLINE OVERLOADABLE uchar4 as_uchar4(char4 v) { union _type_cast_4_b u; u._char4 = v; return u._uchar4; } INLINE OVERLOADABLE float as_float(char4 v) { union _type_cast_4_b u; u._char4 = v; return u._float; } INLINE OVERLOADABLE int as_int(uchar4 v) { union _type_cast_4_b u; u._uchar4 = v; return u._int; } INLINE OVERLOADABLE uint as_uint(uchar4 v) { union _type_cast_4_b u; u._uchar4 = v; return u._uint; } INLINE OVERLOADABLE short2 as_short2(uchar4 v) { union _type_cast_4_b u; u._uchar4 = v; return u._short2; } INLINE OVERLOADABLE ushort2 as_ushort2(uchar4 v) { union _type_cast_4_b u; u._uchar4 = v; return u._ushort2; } INLINE OVERLOADABLE char4 as_char4(uchar4 v) { union _type_cast_4_b u; u._uchar4 = v; return u._char4; } INLINE OVERLOADABLE float as_float(uchar4 v) { union _type_cast_4_b u; u._uchar4 = v; return u._float; } INLINE OVERLOADABLE int as_int(float v) { union _type_cast_4_b u; u._float = v; return u._int; } INLINE OVERLOADABLE uint as_uint(float v) { union _type_cast_4_b u; u._float = v; return u._uint; } INLINE OVERLOADABLE short2 as_short2(float v) { union _type_cast_4_b u; u._float = v; return u._short2; } INLINE OVERLOADABLE ushort2 as_ushort2(float v) { union _type_cast_4_b u; u._float = v; return u._ushort2; } INLINE OVERLOADABLE char4 as_char4(float v) { union _type_cast_4_b u; u._float = v; return u._char4; } INLINE OVERLOADABLE uchar4 as_uchar4(float v) { union _type_cast_4_b u; u._float = v; return u._uchar4; } union _type_cast_6_b { short3 _short3; ushort3 _ushort3; }; INLINE OVERLOADABLE ushort3 as_ushort3(short3 v) { union _type_cast_6_b u; u._short3 = v; return u._ushort3; } INLINE OVERLOADABLE short3 as_short3(ushort3 v) { union _type_cast_6_b u; u._ushort3 = v; return u._short3; } union _type_cast_8_b { long _long; ulong _ulong; int2 _int2; uint2 _uint2; short4 _short4; ushort4 _ushort4; char8 _char8; uchar8 _uchar8; double _double; float2 _float2; }; INLINE OVERLOADABLE ulong as_ulong(long v) { union _type_cast_8_b u; u._long = v; return u._ulong; } INLINE OVERLOADABLE int2 as_int2(long v) { union _type_cast_8_b u; u._long = v; return u._int2; } INLINE OVERLOADABLE uint2 as_uint2(long v) { union _type_cast_8_b u; u._long = v; return u._uint2; } INLINE OVERLOADABLE short4 as_short4(long v) { union _type_cast_8_b u; u._long = v; return u._short4; } INLINE OVERLOADABLE ushort4 as_ushort4(long v) { union _type_cast_8_b u; u._long = v; return u._ushort4; } INLINE OVERLOADABLE char8 as_char8(long v) { union _type_cast_8_b u; u._long = v; return u._char8; } INLINE OVERLOADABLE uchar8 as_uchar8(long v) { union _type_cast_8_b u; u._long = v; return u._uchar8; } INLINE OVERLOADABLE double as_double(long v) { union _type_cast_8_b u; u._long = v; return u._double; } INLINE OVERLOADABLE float2 as_float2(long v) { union _type_cast_8_b u; u._long = v; return u._float2; } INLINE OVERLOADABLE long as_long(ulong v) { union _type_cast_8_b u; u._ulong = v; return u._long; } INLINE OVERLOADABLE int2 as_int2(ulong v) { union _type_cast_8_b u; u._ulong = v; return u._int2; } INLINE OVERLOADABLE uint2 as_uint2(ulong v) { union _type_cast_8_b u; u._ulong = v; return u._uint2; } INLINE OVERLOADABLE short4 as_short4(ulong v) { union _type_cast_8_b u; u._ulong = v; return u._short4; } INLINE OVERLOADABLE ushort4 as_ushort4(ulong v) { union _type_cast_8_b u; u._ulong = v; return u._ushort4; } INLINE OVERLOADABLE char8 as_char8(ulong v) { union _type_cast_8_b u; u._ulong = v; return u._char8; } INLINE OVERLOADABLE uchar8 as_uchar8(ulong v) { union _type_cast_8_b u; u._ulong = v; return u._uchar8; } INLINE OVERLOADABLE double as_double(ulong v) { union _type_cast_8_b u; u._ulong = v; return u._double; } INLINE OVERLOADABLE float2 as_float2(ulong v) { union _type_cast_8_b u; u._ulong = v; return u._float2; } INLINE OVERLOADABLE long as_long(int2 v) { union _type_cast_8_b u; u._int2 = v; return u._long; } INLINE OVERLOADABLE ulong as_ulong(int2 v) { union _type_cast_8_b u; u._int2 = v; return u._ulong; } INLINE OVERLOADABLE uint2 as_uint2(int2 v) { union _type_cast_8_b u; u._int2 = v; return u._uint2; } INLINE OVERLOADABLE short4 as_short4(int2 v) { union _type_cast_8_b u; u._int2 = v; return u._short4; } INLINE OVERLOADABLE ushort4 as_ushort4(int2 v) { union _type_cast_8_b u; u._int2 = v; return u._ushort4; } INLINE OVERLOADABLE char8 as_char8(int2 v) { union _type_cast_8_b u; u._int2 = v; return u._char8; } INLINE OVERLOADABLE uchar8 as_uchar8(int2 v) { union _type_cast_8_b u; u._int2 = v; return u._uchar8; } INLINE OVERLOADABLE double as_double(int2 v) { union _type_cast_8_b u; u._int2 = v; return u._double; } INLINE OVERLOADABLE float2 as_float2(int2 v) { union _type_cast_8_b u; u._int2 = v; return u._float2; } INLINE OVERLOADABLE long as_long(uint2 v) { union _type_cast_8_b u; u._uint2 = v; return u._long; } INLINE OVERLOADABLE ulong as_ulong(uint2 v) { union _type_cast_8_b u; u._uint2 = v; return u._ulong; } INLINE OVERLOADABLE int2 as_int2(uint2 v) { union _type_cast_8_b u; u._uint2 = v; return u._int2; } INLINE OVERLOADABLE short4 as_short4(uint2 v) { union _type_cast_8_b u; u._uint2 = v; return u._short4; } INLINE OVERLOADABLE ushort4 as_ushort4(uint2 v) { union _type_cast_8_b u; u._uint2 = v; return u._ushort4; } INLINE OVERLOADABLE char8 as_char8(uint2 v) { union _type_cast_8_b u; u._uint2 = v; return u._char8; } INLINE OVERLOADABLE uchar8 as_uchar8(uint2 v) { union _type_cast_8_b u; u._uint2 = v; return u._uchar8; } INLINE OVERLOADABLE double as_double(uint2 v) { union _type_cast_8_b u; u._uint2 = v; return u._double; } INLINE OVERLOADABLE float2 as_float2(uint2 v) { union _type_cast_8_b u; u._uint2 = v; return u._float2; } INLINE OVERLOADABLE long as_long(short4 v) { union _type_cast_8_b u; u._short4 = v; return u._long; } INLINE OVERLOADABLE ulong as_ulong(short4 v) { union _type_cast_8_b u; u._short4 = v; return u._ulong; } INLINE OVERLOADABLE int2 as_int2(short4 v) { union _type_cast_8_b u; u._short4 = v; return u._int2; } INLINE OVERLOADABLE uint2 as_uint2(short4 v) { union _type_cast_8_b u; u._short4 = v; return u._uint2; } INLINE OVERLOADABLE ushort4 as_ushort4(short4 v) { union _type_cast_8_b u; u._short4 = v; return u._ushort4; } INLINE OVERLOADABLE char8 as_char8(short4 v) { union _type_cast_8_b u; u._short4 = v; return u._char8; } INLINE OVERLOADABLE uchar8 as_uchar8(short4 v) { union _type_cast_8_b u; u._short4 = v; return u._uchar8; } INLINE OVERLOADABLE double as_double(short4 v) { union _type_cast_8_b u; u._short4 = v; return u._double; } INLINE OVERLOADABLE float2 as_float2(short4 v) { union _type_cast_8_b u; u._short4 = v; return u._float2; } INLINE OVERLOADABLE long as_long(ushort4 v) { union _type_cast_8_b u; u._ushort4 = v; return u._long; } INLINE OVERLOADABLE ulong as_ulong(ushort4 v) { union _type_cast_8_b u; u._ushort4 = v; return u._ulong; } INLINE OVERLOADABLE int2 as_int2(ushort4 v) { union _type_cast_8_b u; u._ushort4 = v; return u._int2; } INLINE OVERLOADABLE uint2 as_uint2(ushort4 v) { union _type_cast_8_b u; u._ushort4 = v; return u._uint2; } INLINE OVERLOADABLE short4 as_short4(ushort4 v) { union _type_cast_8_b u; u._ushort4 = v; return u._short4; } INLINE OVERLOADABLE char8 as_char8(ushort4 v) { union _type_cast_8_b u; u._ushort4 = v; return u._char8; } INLINE OVERLOADABLE uchar8 as_uchar8(ushort4 v) { union _type_cast_8_b u; u._ushort4 = v; return u._uchar8; } INLINE OVERLOADABLE double as_double(ushort4 v) { union _type_cast_8_b u; u._ushort4 = v; return u._double; } INLINE OVERLOADABLE float2 as_float2(ushort4 v) { union _type_cast_8_b u; u._ushort4 = v; return u._float2; } INLINE OVERLOADABLE long as_long(char8 v) { union _type_cast_8_b u; u._char8 = v; return u._long; } INLINE OVERLOADABLE ulong as_ulong(char8 v) { union _type_cast_8_b u; u._char8 = v; return u._ulong; } INLINE OVERLOADABLE int2 as_int2(char8 v) { union _type_cast_8_b u; u._char8 = v; return u._int2; } INLINE OVERLOADABLE uint2 as_uint2(char8 v) { union _type_cast_8_b u; u._char8 = v; return u._uint2; } INLINE OVERLOADABLE short4 as_short4(char8 v) { union _type_cast_8_b u; u._char8 = v; return u._short4; } INLINE OVERLOADABLE ushort4 as_ushort4(char8 v) { union _type_cast_8_b u; u._char8 = v; return u._ushort4; } INLINE OVERLOADABLE uchar8 as_uchar8(char8 v) { union _type_cast_8_b u; u._char8 = v; return u._uchar8; } INLINE OVERLOADABLE double as_double(char8 v) { union _type_cast_8_b u; u._char8 = v; return u._double; } INLINE OVERLOADABLE float2 as_float2(char8 v) { union _type_cast_8_b u; u._char8 = v; return u._float2; } INLINE OVERLOADABLE long as_long(uchar8 v) { union _type_cast_8_b u; u._uchar8 = v; return u._long; } INLINE OVERLOADABLE ulong as_ulong(uchar8 v) { union _type_cast_8_b u; u._uchar8 = v; return u._ulong; } INLINE OVERLOADABLE int2 as_int2(uchar8 v) { union _type_cast_8_b u; u._uchar8 = v; return u._int2; } INLINE OVERLOADABLE uint2 as_uint2(uchar8 v) { union _type_cast_8_b u; u._uchar8 = v; return u._uint2; } INLINE OVERLOADABLE short4 as_short4(uchar8 v) { union _type_cast_8_b u; u._uchar8 = v; return u._short4; } INLINE OVERLOADABLE ushort4 as_ushort4(uchar8 v) { union _type_cast_8_b u; u._uchar8 = v; return u._ushort4; } INLINE OVERLOADABLE char8 as_char8(uchar8 v) { union _type_cast_8_b u; u._uchar8 = v; return u._char8; } INLINE OVERLOADABLE double as_double(uchar8 v) { union _type_cast_8_b u; u._uchar8 = v; return u._double; } INLINE OVERLOADABLE float2 as_float2(uchar8 v) { union _type_cast_8_b u; u._uchar8 = v; return u._float2; } INLINE OVERLOADABLE long as_long(double v) { union _type_cast_8_b u; u._double = v; return u._long; } INLINE OVERLOADABLE ulong as_ulong(double v) { union _type_cast_8_b u; u._double = v; return u._ulong; } INLINE OVERLOADABLE int2 as_int2(double v) { union _type_cast_8_b u; u._double = v; return u._int2; } INLINE OVERLOADABLE uint2 as_uint2(double v) { union _type_cast_8_b u; u._double = v; return u._uint2; } INLINE OVERLOADABLE short4 as_short4(double v) { union _type_cast_8_b u; u._double = v; return u._short4; } INLINE OVERLOADABLE ushort4 as_ushort4(double v) { union _type_cast_8_b u; u._double = v; return u._ushort4; } INLINE OVERLOADABLE char8 as_char8(double v) { union _type_cast_8_b u; u._double = v; return u._char8; } INLINE OVERLOADABLE uchar8 as_uchar8(double v) { union _type_cast_8_b u; u._double = v; return u._uchar8; } INLINE OVERLOADABLE float2 as_float2(double v) { union _type_cast_8_b u; u._double = v; return u._float2; } INLINE OVERLOADABLE long as_long(float2 v) { union _type_cast_8_b u; u._float2 = v; return u._long; } INLINE OVERLOADABLE ulong as_ulong(float2 v) { union _type_cast_8_b u; u._float2 = v; return u._ulong; } INLINE OVERLOADABLE int2 as_int2(float2 v) { union _type_cast_8_b u; u._float2 = v; return u._int2; } INLINE OVERLOADABLE uint2 as_uint2(float2 v) { union _type_cast_8_b u; u._float2 = v; return u._uint2; } INLINE OVERLOADABLE short4 as_short4(float2 v) { union _type_cast_8_b u; u._float2 = v; return u._short4; } INLINE OVERLOADABLE ushort4 as_ushort4(float2 v) { union _type_cast_8_b u; u._float2 = v; return u._ushort4; } INLINE OVERLOADABLE char8 as_char8(float2 v) { union _type_cast_8_b u; u._float2 = v; return u._char8; } INLINE OVERLOADABLE uchar8 as_uchar8(float2 v) { union _type_cast_8_b u; u._float2 = v; return u._uchar8; } INLINE OVERLOADABLE double as_double(float2 v) { union _type_cast_8_b u; u._float2 = v; return u._double; } union _type_cast_12_b { int3 _int3; uint3 _uint3; float3 _float3; }; INLINE OVERLOADABLE uint3 as_uint3(int3 v) { union _type_cast_12_b u; u._int3 = v; return u._uint3; } INLINE OVERLOADABLE float3 as_float3(int3 v) { union _type_cast_12_b u; u._int3 = v; return u._float3; } INLINE OVERLOADABLE int3 as_int3(uint3 v) { union _type_cast_12_b u; u._uint3 = v; return u._int3; } INLINE OVERLOADABLE float3 as_float3(uint3 v) { union _type_cast_12_b u; u._uint3 = v; return u._float3; } INLINE OVERLOADABLE int3 as_int3(float3 v) { union _type_cast_12_b u; u._float3 = v; return u._int3; } INLINE OVERLOADABLE uint3 as_uint3(float3 v) { union _type_cast_12_b u; u._float3 = v; return u._uint3; } union _type_cast_16_b { long2 _long2; ulong2 _ulong2; int4 _int4; uint4 _uint4; short8 _short8; ushort8 _ushort8; char16 _char16; uchar16 _uchar16; double2 _double2; float4 _float4; }; INLINE OVERLOADABLE ulong2 as_ulong2(long2 v) { union _type_cast_16_b u; u._long2 = v; return u._ulong2; } INLINE OVERLOADABLE int4 as_int4(long2 v) { union _type_cast_16_b u; u._long2 = v; return u._int4; } INLINE OVERLOADABLE uint4 as_uint4(long2 v) { union _type_cast_16_b u; u._long2 = v; return u._uint4; } INLINE OVERLOADABLE short8 as_short8(long2 v) { union _type_cast_16_b u; u._long2 = v; return u._short8; } INLINE OVERLOADABLE ushort8 as_ushort8(long2 v) { union _type_cast_16_b u; u._long2 = v; return u._ushort8; } INLINE OVERLOADABLE char16 as_char16(long2 v) { union _type_cast_16_b u; u._long2 = v; return u._char16; } INLINE OVERLOADABLE uchar16 as_uchar16(long2 v) { union _type_cast_16_b u; u._long2 = v; return u._uchar16; } INLINE OVERLOADABLE double2 as_double2(long2 v) { union _type_cast_16_b u; u._long2 = v; return u._double2; } INLINE OVERLOADABLE float4 as_float4(long2 v) { union _type_cast_16_b u; u._long2 = v; return u._float4; } INLINE OVERLOADABLE long2 as_long2(ulong2 v) { union _type_cast_16_b u; u._ulong2 = v; return u._long2; } INLINE OVERLOADABLE int4 as_int4(ulong2 v) { union _type_cast_16_b u; u._ulong2 = v; return u._int4; } INLINE OVERLOADABLE uint4 as_uint4(ulong2 v) { union _type_cast_16_b u; u._ulong2 = v; return u._uint4; } INLINE OVERLOADABLE short8 as_short8(ulong2 v) { union _type_cast_16_b u; u._ulong2 = v; return u._short8; } INLINE OVERLOADABLE ushort8 as_ushort8(ulong2 v) { union _type_cast_16_b u; u._ulong2 = v; return u._ushort8; } INLINE OVERLOADABLE char16 as_char16(ulong2 v) { union _type_cast_16_b u; u._ulong2 = v; return u._char16; } INLINE OVERLOADABLE uchar16 as_uchar16(ulong2 v) { union _type_cast_16_b u; u._ulong2 = v; return u._uchar16; } INLINE OVERLOADABLE double2 as_double2(ulong2 v) { union _type_cast_16_b u; u._ulong2 = v; return u._double2; } INLINE OVERLOADABLE float4 as_float4(ulong2 v) { union _type_cast_16_b u; u._ulong2 = v; return u._float4; } INLINE OVERLOADABLE long2 as_long2(int4 v) { union _type_cast_16_b u; u._int4 = v; return u._long2; } INLINE OVERLOADABLE ulong2 as_ulong2(int4 v) { union _type_cast_16_b u; u._int4 = v; return u._ulong2; } INLINE OVERLOADABLE uint4 as_uint4(int4 v) { union _type_cast_16_b u; u._int4 = v; return u._uint4; } INLINE OVERLOADABLE short8 as_short8(int4 v) { union _type_cast_16_b u; u._int4 = v; return u._short8; } INLINE OVERLOADABLE ushort8 as_ushort8(int4 v) { union _type_cast_16_b u; u._int4 = v; return u._ushort8; } INLINE OVERLOADABLE char16 as_char16(int4 v) { union _type_cast_16_b u; u._int4 = v; return u._char16; } INLINE OVERLOADABLE uchar16 as_uchar16(int4 v) { union _type_cast_16_b u; u._int4 = v; return u._uchar16; } INLINE OVERLOADABLE double2 as_double2(int4 v) { union _type_cast_16_b u; u._int4 = v; return u._double2; } INLINE OVERLOADABLE float4 as_float4(int4 v) { union _type_cast_16_b u; u._int4 = v; return u._float4; } INLINE OVERLOADABLE long2 as_long2(uint4 v) { union _type_cast_16_b u; u._uint4 = v; return u._long2; } INLINE OVERLOADABLE ulong2 as_ulong2(uint4 v) { union _type_cast_16_b u; u._uint4 = v; return u._ulong2; } INLINE OVERLOADABLE int4 as_int4(uint4 v) { union _type_cast_16_b u; u._uint4 = v; return u._int4; } INLINE OVERLOADABLE short8 as_short8(uint4 v) { union _type_cast_16_b u; u._uint4 = v; return u._short8; } INLINE OVERLOADABLE ushort8 as_ushort8(uint4 v) { union _type_cast_16_b u; u._uint4 = v; return u._ushort8; } INLINE OVERLOADABLE char16 as_char16(uint4 v) { union _type_cast_16_b u; u._uint4 = v; return u._char16; } INLINE OVERLOADABLE uchar16 as_uchar16(uint4 v) { union _type_cast_16_b u; u._uint4 = v; return u._uchar16; } INLINE OVERLOADABLE double2 as_double2(uint4 v) { union _type_cast_16_b u; u._uint4 = v; return u._double2; } INLINE OVERLOADABLE float4 as_float4(uint4 v) { union _type_cast_16_b u; u._uint4 = v; return u._float4; } INLINE OVERLOADABLE long2 as_long2(short8 v) { union _type_cast_16_b u; u._short8 = v; return u._long2; } INLINE OVERLOADABLE ulong2 as_ulong2(short8 v) { union _type_cast_16_b u; u._short8 = v; return u._ulong2; } INLINE OVERLOADABLE int4 as_int4(short8 v) { union _type_cast_16_b u; u._short8 = v; return u._int4; } INLINE OVERLOADABLE uint4 as_uint4(short8 v) { union _type_cast_16_b u; u._short8 = v; return u._uint4; } INLINE OVERLOADABLE ushort8 as_ushort8(short8 v) { union _type_cast_16_b u; u._short8 = v; return u._ushort8; } INLINE OVERLOADABLE char16 as_char16(short8 v) { union _type_cast_16_b u; u._short8 = v; return u._char16; } INLINE OVERLOADABLE uchar16 as_uchar16(short8 v) { union _type_cast_16_b u; u._short8 = v; return u._uchar16; } INLINE OVERLOADABLE double2 as_double2(short8 v) { union _type_cast_16_b u; u._short8 = v; return u._double2; } INLINE OVERLOADABLE float4 as_float4(short8 v) { union _type_cast_16_b u; u._short8 = v; return u._float4; } INLINE OVERLOADABLE long2 as_long2(ushort8 v) { union _type_cast_16_b u; u._ushort8 = v; return u._long2; } INLINE OVERLOADABLE ulong2 as_ulong2(ushort8 v) { union _type_cast_16_b u; u._ushort8 = v; return u._ulong2; } INLINE OVERLOADABLE int4 as_int4(ushort8 v) { union _type_cast_16_b u; u._ushort8 = v; return u._int4; } INLINE OVERLOADABLE uint4 as_uint4(ushort8 v) { union _type_cast_16_b u; u._ushort8 = v; return u._uint4; } INLINE OVERLOADABLE short8 as_short8(ushort8 v) { union _type_cast_16_b u; u._ushort8 = v; return u._short8; } INLINE OVERLOADABLE char16 as_char16(ushort8 v) { union _type_cast_16_b u; u._ushort8 = v; return u._char16; } INLINE OVERLOADABLE uchar16 as_uchar16(ushort8 v) { union _type_cast_16_b u; u._ushort8 = v; return u._uchar16; } INLINE OVERLOADABLE double2 as_double2(ushort8 v) { union _type_cast_16_b u; u._ushort8 = v; return u._double2; } INLINE OVERLOADABLE float4 as_float4(ushort8 v) { union _type_cast_16_b u; u._ushort8 = v; return u._float4; } INLINE OVERLOADABLE long2 as_long2(char16 v) { union _type_cast_16_b u; u._char16 = v; return u._long2; } INLINE OVERLOADABLE ulong2 as_ulong2(char16 v) { union _type_cast_16_b u; u._char16 = v; return u._ulong2; } INLINE OVERLOADABLE int4 as_int4(char16 v) { union _type_cast_16_b u; u._char16 = v; return u._int4; } INLINE OVERLOADABLE uint4 as_uint4(char16 v) { union _type_cast_16_b u; u._char16 = v; return u._uint4; } INLINE OVERLOADABLE short8 as_short8(char16 v) { union _type_cast_16_b u; u._char16 = v; return u._short8; } INLINE OVERLOADABLE ushort8 as_ushort8(char16 v) { union _type_cast_16_b u; u._char16 = v; return u._ushort8; } INLINE OVERLOADABLE uchar16 as_uchar16(char16 v) { union _type_cast_16_b u; u._char16 = v; return u._uchar16; } INLINE OVERLOADABLE double2 as_double2(char16 v) { union _type_cast_16_b u; u._char16 = v; return u._double2; } INLINE OVERLOADABLE float4 as_float4(char16 v) { union _type_cast_16_b u; u._char16 = v; return u._float4; } INLINE OVERLOADABLE long2 as_long2(uchar16 v) { union _type_cast_16_b u; u._uchar16 = v; return u._long2; } INLINE OVERLOADABLE ulong2 as_ulong2(uchar16 v) { union _type_cast_16_b u; u._uchar16 = v; return u._ulong2; } INLINE OVERLOADABLE int4 as_int4(uchar16 v) { union _type_cast_16_b u; u._uchar16 = v; return u._int4; } INLINE OVERLOADABLE uint4 as_uint4(uchar16 v) { union _type_cast_16_b u; u._uchar16 = v; return u._uint4; } INLINE OVERLOADABLE short8 as_short8(uchar16 v) { union _type_cast_16_b u; u._uchar16 = v; return u._short8; } INLINE OVERLOADABLE ushort8 as_ushort8(uchar16 v) { union _type_cast_16_b u; u._uchar16 = v; return u._ushort8; } INLINE OVERLOADABLE char16 as_char16(uchar16 v) { union _type_cast_16_b u; u._uchar16 = v; return u._char16; } INLINE OVERLOADABLE double2 as_double2(uchar16 v) { union _type_cast_16_b u; u._uchar16 = v; return u._double2; } INLINE OVERLOADABLE float4 as_float4(uchar16 v) { union _type_cast_16_b u; u._uchar16 = v; return u._float4; } INLINE OVERLOADABLE long2 as_long2(double2 v) { union _type_cast_16_b u; u._double2 = v; return u._long2; } INLINE OVERLOADABLE ulong2 as_ulong2(double2 v) { union _type_cast_16_b u; u._double2 = v; return u._ulong2; } INLINE OVERLOADABLE int4 as_int4(double2 v) { union _type_cast_16_b u; u._double2 = v; return u._int4; } INLINE OVERLOADABLE uint4 as_uint4(double2 v) { union _type_cast_16_b u; u._double2 = v; return u._uint4; } INLINE OVERLOADABLE short8 as_short8(double2 v) { union _type_cast_16_b u; u._double2 = v; return u._short8; } INLINE OVERLOADABLE ushort8 as_ushort8(double2 v) { union _type_cast_16_b u; u._double2 = v; return u._ushort8; } INLINE OVERLOADABLE char16 as_char16(double2 v) { union _type_cast_16_b u; u._double2 = v; return u._char16; } INLINE OVERLOADABLE uchar16 as_uchar16(double2 v) { union _type_cast_16_b u; u._double2 = v; return u._uchar16; } INLINE OVERLOADABLE float4 as_float4(double2 v) { union _type_cast_16_b u; u._double2 = v; return u._float4; } INLINE OVERLOADABLE long2 as_long2(float4 v) { union _type_cast_16_b u; u._float4 = v; return u._long2; } INLINE OVERLOADABLE ulong2 as_ulong2(float4 v) { union _type_cast_16_b u; u._float4 = v; return u._ulong2; } INLINE OVERLOADABLE int4 as_int4(float4 v) { union _type_cast_16_b u; u._float4 = v; return u._int4; } INLINE OVERLOADABLE uint4 as_uint4(float4 v) { union _type_cast_16_b u; u._float4 = v; return u._uint4; } INLINE OVERLOADABLE short8 as_short8(float4 v) { union _type_cast_16_b u; u._float4 = v; return u._short8; } INLINE OVERLOADABLE ushort8 as_ushort8(float4 v) { union _type_cast_16_b u; u._float4 = v; return u._ushort8; } INLINE OVERLOADABLE char16 as_char16(float4 v) { union _type_cast_16_b u; u._float4 = v; return u._char16; } INLINE OVERLOADABLE uchar16 as_uchar16(float4 v) { union _type_cast_16_b u; u._float4 = v; return u._uchar16; } INLINE OVERLOADABLE double2 as_double2(float4 v) { union _type_cast_16_b u; u._float4 = v; return u._double2; } union _type_cast_24_b { long3 _long3; ulong3 _ulong3; double3 _double3; }; INLINE OVERLOADABLE ulong3 as_ulong3(long3 v) { union _type_cast_24_b u; u._long3 = v; return u._ulong3; } INLINE OVERLOADABLE double3 as_double3(long3 v) { union _type_cast_24_b u; u._long3 = v; return u._double3; } INLINE OVERLOADABLE long3 as_long3(ulong3 v) { union _type_cast_24_b u; u._ulong3 = v; return u._long3; } INLINE OVERLOADABLE double3 as_double3(ulong3 v) { union _type_cast_24_b u; u._ulong3 = v; return u._double3; } INLINE OVERLOADABLE long3 as_long3(double3 v) { union _type_cast_24_b u; u._double3 = v; return u._long3; } INLINE OVERLOADABLE ulong3 as_ulong3(double3 v) { union _type_cast_24_b u; u._double3 = v; return u._ulong3; } union _type_cast_32_b { long4 _long4; ulong4 _ulong4; int8 _int8; uint8 _uint8; short16 _short16; ushort16 _ushort16; double4 _double4; float8 _float8; }; INLINE OVERLOADABLE ulong4 as_ulong4(long4 v) { union _type_cast_32_b u; u._long4 = v; return u._ulong4; } INLINE OVERLOADABLE int8 as_int8(long4 v) { union _type_cast_32_b u; u._long4 = v; return u._int8; } INLINE OVERLOADABLE uint8 as_uint8(long4 v) { union _type_cast_32_b u; u._long4 = v; return u._uint8; } INLINE OVERLOADABLE short16 as_short16(long4 v) { union _type_cast_32_b u; u._long4 = v; return u._short16; } INLINE OVERLOADABLE ushort16 as_ushort16(long4 v) { union _type_cast_32_b u; u._long4 = v; return u._ushort16; } INLINE OVERLOADABLE double4 as_double4(long4 v) { union _type_cast_32_b u; u._long4 = v; return u._double4; } INLINE OVERLOADABLE float8 as_float8(long4 v) { union _type_cast_32_b u; u._long4 = v; return u._float8; } INLINE OVERLOADABLE long4 as_long4(ulong4 v) { union _type_cast_32_b u; u._ulong4 = v; return u._long4; } INLINE OVERLOADABLE int8 as_int8(ulong4 v) { union _type_cast_32_b u; u._ulong4 = v; return u._int8; } INLINE OVERLOADABLE uint8 as_uint8(ulong4 v) { union _type_cast_32_b u; u._ulong4 = v; return u._uint8; } INLINE OVERLOADABLE short16 as_short16(ulong4 v) { union _type_cast_32_b u; u._ulong4 = v; return u._short16; } INLINE OVERLOADABLE ushort16 as_ushort16(ulong4 v) { union _type_cast_32_b u; u._ulong4 = v; return u._ushort16; } INLINE OVERLOADABLE double4 as_double4(ulong4 v) { union _type_cast_32_b u; u._ulong4 = v; return u._double4; } INLINE OVERLOADABLE float8 as_float8(ulong4 v) { union _type_cast_32_b u; u._ulong4 = v; return u._float8; } INLINE OVERLOADABLE long4 as_long4(int8 v) { union _type_cast_32_b u; u._int8 = v; return u._long4; } INLINE OVERLOADABLE ulong4 as_ulong4(int8 v) { union _type_cast_32_b u; u._int8 = v; return u._ulong4; } INLINE OVERLOADABLE uint8 as_uint8(int8 v) { union _type_cast_32_b u; u._int8 = v; return u._uint8; } INLINE OVERLOADABLE short16 as_short16(int8 v) { union _type_cast_32_b u; u._int8 = v; return u._short16; } INLINE OVERLOADABLE ushort16 as_ushort16(int8 v) { union _type_cast_32_b u; u._int8 = v; return u._ushort16; } INLINE OVERLOADABLE double4 as_double4(int8 v) { union _type_cast_32_b u; u._int8 = v; return u._double4; } INLINE OVERLOADABLE float8 as_float8(int8 v) { union _type_cast_32_b u; u._int8 = v; return u._float8; } INLINE OVERLOADABLE long4 as_long4(uint8 v) { union _type_cast_32_b u; u._uint8 = v; return u._long4; } INLINE OVERLOADABLE ulong4 as_ulong4(uint8 v) { union _type_cast_32_b u; u._uint8 = v; return u._ulong4; } INLINE OVERLOADABLE int8 as_int8(uint8 v) { union _type_cast_32_b u; u._uint8 = v; return u._int8; } INLINE OVERLOADABLE short16 as_short16(uint8 v) { union _type_cast_32_b u; u._uint8 = v; return u._short16; } INLINE OVERLOADABLE ushort16 as_ushort16(uint8 v) { union _type_cast_32_b u; u._uint8 = v; return u._ushort16; } INLINE OVERLOADABLE double4 as_double4(uint8 v) { union _type_cast_32_b u; u._uint8 = v; return u._double4; } INLINE OVERLOADABLE float8 as_float8(uint8 v) { union _type_cast_32_b u; u._uint8 = v; return u._float8; } INLINE OVERLOADABLE long4 as_long4(short16 v) { union _type_cast_32_b u; u._short16 = v; return u._long4; } INLINE OVERLOADABLE ulong4 as_ulong4(short16 v) { union _type_cast_32_b u; u._short16 = v; return u._ulong4; } INLINE OVERLOADABLE int8 as_int8(short16 v) { union _type_cast_32_b u; u._short16 = v; return u._int8; } INLINE OVERLOADABLE uint8 as_uint8(short16 v) { union _type_cast_32_b u; u._short16 = v; return u._uint8; } INLINE OVERLOADABLE ushort16 as_ushort16(short16 v) { union _type_cast_32_b u; u._short16 = v; return u._ushort16; } INLINE OVERLOADABLE double4 as_double4(short16 v) { union _type_cast_32_b u; u._short16 = v; return u._double4; } INLINE OVERLOADABLE float8 as_float8(short16 v) { union _type_cast_32_b u; u._short16 = v; return u._float8; } INLINE OVERLOADABLE long4 as_long4(ushort16 v) { union _type_cast_32_b u; u._ushort16 = v; return u._long4; } INLINE OVERLOADABLE ulong4 as_ulong4(ushort16 v) { union _type_cast_32_b u; u._ushort16 = v; return u._ulong4; } INLINE OVERLOADABLE int8 as_int8(ushort16 v) { union _type_cast_32_b u; u._ushort16 = v; return u._int8; } INLINE OVERLOADABLE uint8 as_uint8(ushort16 v) { union _type_cast_32_b u; u._ushort16 = v; return u._uint8; } INLINE OVERLOADABLE short16 as_short16(ushort16 v) { union _type_cast_32_b u; u._ushort16 = v; return u._short16; } INLINE OVERLOADABLE double4 as_double4(ushort16 v) { union _type_cast_32_b u; u._ushort16 = v; return u._double4; } INLINE OVERLOADABLE float8 as_float8(ushort16 v) { union _type_cast_32_b u; u._ushort16 = v; return u._float8; } INLINE OVERLOADABLE long4 as_long4(double4 v) { union _type_cast_32_b u; u._double4 = v; return u._long4; } INLINE OVERLOADABLE ulong4 as_ulong4(double4 v) { union _type_cast_32_b u; u._double4 = v; return u._ulong4; } INLINE OVERLOADABLE int8 as_int8(double4 v) { union _type_cast_32_b u; u._double4 = v; return u._int8; } INLINE OVERLOADABLE uint8 as_uint8(double4 v) { union _type_cast_32_b u; u._double4 = v; return u._uint8; } INLINE OVERLOADABLE short16 as_short16(double4 v) { union _type_cast_32_b u; u._double4 = v; return u._short16; } INLINE OVERLOADABLE ushort16 as_ushort16(double4 v) { union _type_cast_32_b u; u._double4 = v; return u._ushort16; } INLINE OVERLOADABLE float8 as_float8(double4 v) { union _type_cast_32_b u; u._double4 = v; return u._float8; } INLINE OVERLOADABLE long4 as_long4(float8 v) { union _type_cast_32_b u; u._float8 = v; return u._long4; } INLINE OVERLOADABLE ulong4 as_ulong4(float8 v) { union _type_cast_32_b u; u._float8 = v; return u._ulong4; } INLINE OVERLOADABLE int8 as_int8(float8 v) { union _type_cast_32_b u; u._float8 = v; return u._int8; } INLINE OVERLOADABLE uint8 as_uint8(float8 v) { union _type_cast_32_b u; u._float8 = v; return u._uint8; } INLINE OVERLOADABLE short16 as_short16(float8 v) { union _type_cast_32_b u; u._float8 = v; return u._short16; } INLINE OVERLOADABLE ushort16 as_ushort16(float8 v) { union _type_cast_32_b u; u._float8 = v; return u._ushort16; } INLINE OVERLOADABLE double4 as_double4(float8 v) { union _type_cast_32_b u; u._float8 = v; return u._double4; } union _type_cast_64_b { long8 _long8; ulong8 _ulong8; int16 _int16; uint16 _uint16; double8 _double8; float16 _float16; }; INLINE OVERLOADABLE ulong8 as_ulong8(long8 v) { union _type_cast_64_b u; u._long8 = v; return u._ulong8; } INLINE OVERLOADABLE int16 as_int16(long8 v) { union _type_cast_64_b u; u._long8 = v; return u._int16; } INLINE OVERLOADABLE uint16 as_uint16(long8 v) { union _type_cast_64_b u; u._long8 = v; return u._uint16; } INLINE OVERLOADABLE double8 as_double8(long8 v) { union _type_cast_64_b u; u._long8 = v; return u._double8; } INLINE OVERLOADABLE float16 as_float16(long8 v) { union _type_cast_64_b u; u._long8 = v; return u._float16; } INLINE OVERLOADABLE long8 as_long8(ulong8 v) { union _type_cast_64_b u; u._ulong8 = v; return u._long8; } INLINE OVERLOADABLE int16 as_int16(ulong8 v) { union _type_cast_64_b u; u._ulong8 = v; return u._int16; } INLINE OVERLOADABLE uint16 as_uint16(ulong8 v) { union _type_cast_64_b u; u._ulong8 = v; return u._uint16; } INLINE OVERLOADABLE double8 as_double8(ulong8 v) { union _type_cast_64_b u; u._ulong8 = v; return u._double8; } INLINE OVERLOADABLE float16 as_float16(ulong8 v) { union _type_cast_64_b u; u._ulong8 = v; return u._float16; } INLINE OVERLOADABLE long8 as_long8(int16 v) { union _type_cast_64_b u; u._int16 = v; return u._long8; } INLINE OVERLOADABLE ulong8 as_ulong8(int16 v) { union _type_cast_64_b u; u._int16 = v; return u._ulong8; } INLINE OVERLOADABLE uint16 as_uint16(int16 v) { union _type_cast_64_b u; u._int16 = v; return u._uint16; } INLINE OVERLOADABLE double8 as_double8(int16 v) { union _type_cast_64_b u; u._int16 = v; return u._double8; } INLINE OVERLOADABLE float16 as_float16(int16 v) { union _type_cast_64_b u; u._int16 = v; return u._float16; } INLINE OVERLOADABLE long8 as_long8(uint16 v) { union _type_cast_64_b u; u._uint16 = v; return u._long8; } INLINE OVERLOADABLE ulong8 as_ulong8(uint16 v) { union _type_cast_64_b u; u._uint16 = v; return u._ulong8; } INLINE OVERLOADABLE int16 as_int16(uint16 v) { union _type_cast_64_b u; u._uint16 = v; return u._int16; } INLINE OVERLOADABLE double8 as_double8(uint16 v) { union _type_cast_64_b u; u._uint16 = v; return u._double8; } INLINE OVERLOADABLE float16 as_float16(uint16 v) { union _type_cast_64_b u; u._uint16 = v; return u._float16; } INLINE OVERLOADABLE long8 as_long8(double8 v) { union _type_cast_64_b u; u._double8 = v; return u._long8; } INLINE OVERLOADABLE ulong8 as_ulong8(double8 v) { union _type_cast_64_b u; u._double8 = v; return u._ulong8; } INLINE OVERLOADABLE int16 as_int16(double8 v) { union _type_cast_64_b u; u._double8 = v; return u._int16; } INLINE OVERLOADABLE uint16 as_uint16(double8 v) { union _type_cast_64_b u; u._double8 = v; return u._uint16; } INLINE OVERLOADABLE float16 as_float16(double8 v) { union _type_cast_64_b u; u._double8 = v; return u._float16; } INLINE OVERLOADABLE long8 as_long8(float16 v) { union _type_cast_64_b u; u._float16 = v; return u._long8; } INLINE OVERLOADABLE ulong8 as_ulong8(float16 v) { union _type_cast_64_b u; u._float16 = v; return u._ulong8; } INLINE OVERLOADABLE int16 as_int16(float16 v) { union _type_cast_64_b u; u._float16 = v; return u._int16; } INLINE OVERLOADABLE uint16 as_uint16(float16 v) { union _type_cast_64_b u; u._float16 = v; return u._uint16; } INLINE OVERLOADABLE double8 as_double8(float16 v) { union _type_cast_64_b u; u._float16 = v; return u._double8; } union _type_cast_128_b { long16 _long16; ulong16 _ulong16; double16 _double16; }; INLINE OVERLOADABLE ulong16 as_ulong16(long16 v) { union _type_cast_128_b u; u._long16 = v; return u._ulong16; } INLINE OVERLOADABLE double16 as_double16(long16 v) { union _type_cast_128_b u; u._long16 = v; return u._double16; } INLINE OVERLOADABLE long16 as_long16(ulong16 v) { union _type_cast_128_b u; u._ulong16 = v; return u._long16; } INLINE OVERLOADABLE double16 as_double16(ulong16 v) { union _type_cast_128_b u; u._ulong16 = v; return u._double16; } INLINE OVERLOADABLE long16 as_long16(double16 v) { union _type_cast_128_b u; u._double16 = v; return u._long16; } INLINE OVERLOADABLE ulong16 as_ulong16(double16 v) { union _type_cast_128_b u; u._double16 = v; return u._ulong16; } Release_v0.3/backend/src/ocl_common_defines.h000066400000000000000000000117151223142177000213610ustar00rootroot00000000000000// This file includes defines that are common to both kernel code and // the NVPTX back-end. // // Common defines for Image intrinsics // Channel order #define CLK_HAS_ALPHA(color) (color == CLK_A || color == CLK_RA || color == CLK_RGBA || color == CLK_BGRA || color == CLK_ARGB) enum { CLK_R = 0x10B0, CLK_A = 0x10B1, CLK_RG = 0x10B2, CLK_RA = 0x10B3, CLK_RGB = 0x10B4, CLK_RGBA = 0x10B5, CLK_BGRA = 0x10B6, CLK_ARGB = 0x10B7, #if (__NV_CL_C_VERSION == __NV_CL_C_VERSION_1_0) CLK_xRGB = 0x10B7, #endif CLK_INTENSITY = 0x10B8, CLK_LUMINANCE = 0x10B9 #if (__NV_CL_C_VERSION >= __NV_CL_C_VERSION_1_1) , CLK_Rx = 0x10BA, CLK_RGx = 0x10BB, CLK_RGBx = 0x10BC #endif }; typedef enum clk_channel_type { // valid formats for float return types CLK_SNORM_INT8 = 0x10D0, // four channel RGBA unorm8 CLK_SNORM_INT16 = 0x10D1, // four channel RGBA unorm16 CLK_UNORM_INT8 = 0x10D2, // four channel RGBA unorm8 CLK_UNORM_INT16 = 0x10D3, // four channel RGBA unorm16 CLK_HALF_FLOAT = 0x10DD, // four channel RGBA half CLK_FLOAT = 0x10DE, // four channel RGBA float #if (__NV_CL_C_VERSION >= __NV_CL_C_VERSION_1_1) CLK_UNORM_SHORT_565 = 0x10D4, CLK_UNORM_SHORT_555 = 0x10D5, CLK_UNORM_INT_101010 = 0x10D6, #endif // valid only for integer return types CLK_SIGNED_INT8 = 0x10D7, CLK_SIGNED_INT16 = 0x10D8, CLK_SIGNED_INT32 = 0x10D9, CLK_UNSIGNED_INT8 = 0x10DA, CLK_UNSIGNED_INT16 = 0x10DB, CLK_UNSIGNED_INT32 = 0x10DC, // CI SPI for CPU __CLK_UNORM_INT8888 , // four channel ARGB unorm8 __CLK_UNORM_INT8888R, // four channel BGRA unorm8 __CLK_VALID_IMAGE_TYPE_COUNT, __CLK_INVALID_IMAGE_TYPE = __CLK_VALID_IMAGE_TYPE_COUNT, __CLK_VALID_IMAGE_TYPE_MASK_BITS = 4, // number of bits required to // represent any image type __CLK_VALID_IMAGE_TYPE_MASK = ( 1 << __CLK_VALID_IMAGE_TYPE_MASK_BITS ) - 1 }clk_channel_type; typedef enum clk_sampler_type { __CLK_ADDRESS_BASE = 0, CLK_ADDRESS_NONE = (0 << __CLK_ADDRESS_BASE), CLK_ADDRESS_CLAMP = (1 << __CLK_ADDRESS_BASE), CLK_ADDRESS_CLAMP_TO_EDGE = (2 << __CLK_ADDRESS_BASE), CLK_ADDRESS_REPEAT = (3 << __CLK_ADDRESS_BASE), CLK_ADDRESS_MIRROR = (4 << __CLK_ADDRESS_BASE), #if (__NV_CL_C_VERSION >= __NV_CL_C_VERSION_1_1) CLK_ADDRESS_MIRRORED_REPEAT = CLK_ADDRESS_MIRROR, #endif __CLK_ADDRESS_MASK = (CLK_ADDRESS_NONE | CLK_ADDRESS_CLAMP | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_ADDRESS_REPEAT | CLK_ADDRESS_MIRROR), __CLK_ADDRESS_BITS = 3, // number of bits required to // represent address info __CLK_NORMALIZED_BASE = __CLK_ADDRESS_BITS, CLK_NORMALIZED_COORDS_FALSE = 0, CLK_NORMALIZED_COORDS_TRUE = (1 << __CLK_NORMALIZED_BASE), __CLK_NORMALIZED_MASK = (CLK_NORMALIZED_COORDS_FALSE | CLK_NORMALIZED_COORDS_TRUE), __CLK_NORMALIZED_BITS = 1, // number of bits required to // represent normalization __CLK_FILTER_BASE = (__CLK_NORMALIZED_BASE + __CLK_NORMALIZED_BITS), CLK_FILTER_NEAREST = (0 << __CLK_FILTER_BASE), CLK_FILTER_LINEAR = (1 << __CLK_FILTER_BASE), CLK_FILTER_ANISOTROPIC = (2 << __CLK_FILTER_BASE), __CLK_FILTER_MASK = (CLK_FILTER_NEAREST | CLK_FILTER_LINEAR | CLK_FILTER_ANISOTROPIC), __CLK_FILTER_BITS = 2, // number of bits required to // represent address info __CLK_MIP_BASE = (__CLK_FILTER_BASE + __CLK_FILTER_BITS), CLK_MIP_NEAREST = (0 << __CLK_MIP_BASE), CLK_MIP_LINEAR = (1 << __CLK_MIP_BASE), CLK_MIP_ANISOTROPIC = (2 << __CLK_MIP_BASE), __CLK_MIP_MASK = (CLK_MIP_NEAREST | CLK_MIP_LINEAR | CLK_MIP_ANISOTROPIC), __CLK_MIP_BITS = 2, __CLK_SAMPLER_BITS = (__CLK_MIP_BASE + __CLK_MIP_BITS), __CLK_SAMPLER_MASK = (__CLK_MIP_MASK | __CLK_FILTER_MASK | __CLK_NORMALIZED_MASK | __CLK_ADDRESS_MASK), __CLK_SAMPLER_ARG_BASE = (__CLK_MIP_BASE + __CLK_SAMPLER_BITS), __CLK_SAMPLER_ARG_BITS = 8, __CLK_SAMPLER_ARG_MASK = (((1 << __CLK_SAMPLER_ARG_BITS) - 1) << __CLK_SAMPLER_ARG_BASE), __CLK_SAMPLER_ARG_KEY_BIT = (1 << (__CLK_SAMPLER_ARG_BASE + __CLK_SAMPLER_ARG_BITS)), __CLK_SAMPLER_ARG_KEY_BITS = 1, } clk_sampler_type; // Memory synchronization #define CLK_LOCAL_MEM_FENCE (1 << 0) #define CLK_GLOBAL_MEM_FENCE (1 << 1) Release_v0.3/backend/src/ocl_convert.h000066400000000000000000005304071223142177000200600ustar00rootroot00000000000000// This file is autogenerated by gen_convert.sh. // Don't modify it manually. INLINE OVERLOADABLE ulong convert_ulong(long v) { return (ulong)v; } INLINE OVERLOADABLE int convert_int(long v) { return (int)v; } INLINE OVERLOADABLE uint convert_uint(long v) { return (uint)v; } INLINE OVERLOADABLE short convert_short(long v) { return (short)v; } INLINE OVERLOADABLE ushort convert_ushort(long v) { return (ushort)v; } INLINE OVERLOADABLE char convert_char(long v) { return (char)v; } INLINE OVERLOADABLE uchar convert_uchar(long v) { return (uchar)v; } INLINE OVERLOADABLE double convert_double(long v) { return (double)v; } INLINE OVERLOADABLE float convert_float(long v) { return (float)v; } INLINE OVERLOADABLE long convert_long(ulong v) { return (long)v; } INLINE OVERLOADABLE int convert_int(ulong v) { return (int)v; } INLINE OVERLOADABLE uint convert_uint(ulong v) { return (uint)v; } INLINE OVERLOADABLE short convert_short(ulong v) { return (short)v; } INLINE OVERLOADABLE ushort convert_ushort(ulong v) { return (ushort)v; } INLINE OVERLOADABLE char convert_char(ulong v) { return (char)v; } INLINE OVERLOADABLE uchar convert_uchar(ulong v) { return (uchar)v; } INLINE OVERLOADABLE double convert_double(ulong v) { return (double)v; } INLINE OVERLOADABLE float convert_float(ulong v) { return (float)v; } INLINE OVERLOADABLE long convert_long(int v) { return (long)v; } INLINE OVERLOADABLE ulong convert_ulong(int v) { return (ulong)v; } INLINE OVERLOADABLE uint convert_uint(int v) { return (uint)v; } INLINE OVERLOADABLE short convert_short(int v) { return (short)v; } INLINE OVERLOADABLE ushort convert_ushort(int v) { return (ushort)v; } INLINE OVERLOADABLE char convert_char(int v) { return (char)v; } INLINE OVERLOADABLE uchar convert_uchar(int v) { return (uchar)v; } INLINE OVERLOADABLE double convert_double(int v) { return (double)v; } INLINE OVERLOADABLE float convert_float(int v) { return (float)v; } INLINE OVERLOADABLE long convert_long(uint v) { return (long)v; } INLINE OVERLOADABLE ulong convert_ulong(uint v) { return (ulong)v; } INLINE OVERLOADABLE int convert_int(uint v) { return (int)v; } INLINE OVERLOADABLE short convert_short(uint v) { return (short)v; } INLINE OVERLOADABLE ushort convert_ushort(uint v) { return (ushort)v; } INLINE OVERLOADABLE char convert_char(uint v) { return (char)v; } INLINE OVERLOADABLE uchar convert_uchar(uint v) { return (uchar)v; } INLINE OVERLOADABLE double convert_double(uint v) { return (double)v; } INLINE OVERLOADABLE float convert_float(uint v) { return (float)v; } INLINE OVERLOADABLE long convert_long(short v) { return (long)v; } INLINE OVERLOADABLE ulong convert_ulong(short v) { return (ulong)v; } INLINE OVERLOADABLE int convert_int(short v) { return (int)v; } INLINE OVERLOADABLE uint convert_uint(short v) { return (uint)v; } INLINE OVERLOADABLE ushort convert_ushort(short v) { return (ushort)v; } INLINE OVERLOADABLE char convert_char(short v) { return (char)v; } INLINE OVERLOADABLE uchar convert_uchar(short v) { return (uchar)v; } INLINE OVERLOADABLE double convert_double(short v) { return (double)v; } INLINE OVERLOADABLE float convert_float(short v) { return (float)v; } INLINE OVERLOADABLE long convert_long(ushort v) { return (long)v; } INLINE OVERLOADABLE ulong convert_ulong(ushort v) { return (ulong)v; } INLINE OVERLOADABLE int convert_int(ushort v) { return (int)v; } INLINE OVERLOADABLE uint convert_uint(ushort v) { return (uint)v; } INLINE OVERLOADABLE short convert_short(ushort v) { return (short)v; } INLINE OVERLOADABLE char convert_char(ushort v) { return (char)v; } INLINE OVERLOADABLE uchar convert_uchar(ushort v) { return (uchar)v; } INLINE OVERLOADABLE double convert_double(ushort v) { return (double)v; } INLINE OVERLOADABLE float convert_float(ushort v) { return (float)v; } INLINE OVERLOADABLE long convert_long(char v) { return (long)v; } INLINE OVERLOADABLE ulong convert_ulong(char v) { return (ulong)v; } INLINE OVERLOADABLE int convert_int(char v) { return (int)v; } INLINE OVERLOADABLE uint convert_uint(char v) { return (uint)v; } INLINE OVERLOADABLE short convert_short(char v) { return (short)v; } INLINE OVERLOADABLE ushort convert_ushort(char v) { return (ushort)v; } INLINE OVERLOADABLE uchar convert_uchar(char v) { return (uchar)v; } INLINE OVERLOADABLE double convert_double(char v) { return (double)v; } INLINE OVERLOADABLE float convert_float(char v) { return (float)v; } INLINE OVERLOADABLE long convert_long(uchar v) { return (long)v; } INLINE OVERLOADABLE ulong convert_ulong(uchar v) { return (ulong)v; } INLINE OVERLOADABLE int convert_int(uchar v) { return (int)v; } INLINE OVERLOADABLE uint convert_uint(uchar v) { return (uint)v; } INLINE OVERLOADABLE short convert_short(uchar v) { return (short)v; } INLINE OVERLOADABLE ushort convert_ushort(uchar v) { return (ushort)v; } INLINE OVERLOADABLE char convert_char(uchar v) { return (char)v; } INLINE OVERLOADABLE double convert_double(uchar v) { return (double)v; } INLINE OVERLOADABLE float convert_float(uchar v) { return (float)v; } INLINE OVERLOADABLE long convert_long(double v) { return (long)v; } INLINE OVERLOADABLE ulong convert_ulong(double v) { return (ulong)v; } INLINE OVERLOADABLE int convert_int(double v) { return (int)v; } INLINE OVERLOADABLE uint convert_uint(double v) { return (uint)v; } INLINE OVERLOADABLE short convert_short(double v) { return (short)v; } INLINE OVERLOADABLE ushort convert_ushort(double v) { return (ushort)v; } INLINE OVERLOADABLE char convert_char(double v) { return (char)v; } INLINE OVERLOADABLE uchar convert_uchar(double v) { return (uchar)v; } INLINE OVERLOADABLE float convert_float(double v) { return (float)v; } INLINE OVERLOADABLE long convert_long(float v) { return (long)v; } INLINE OVERLOADABLE ulong convert_ulong(float v) { return (ulong)v; } INLINE OVERLOADABLE int convert_int(float v) { return (int)v; } INLINE OVERLOADABLE uint convert_uint(float v) { return (uint)v; } INLINE OVERLOADABLE short convert_short(float v) { return (short)v; } INLINE OVERLOADABLE ushort convert_ushort(float v) { return (ushort)v; } INLINE OVERLOADABLE char convert_char(float v) { return (char)v; } INLINE OVERLOADABLE uchar convert_uchar(float v) { return (uchar)v; } INLINE OVERLOADABLE double convert_double(float v) { return (double)v; } INLINE OVERLOADABLE long2 convert_long2(long2 v) { return v; } INLINE OVERLOADABLE ulong2 convert_ulong2(long2 v) { return (ulong2)((ulong)(v.s0), (ulong)(v.s1)); } INLINE OVERLOADABLE int2 convert_int2(long2 v) { return (int2)((int)(v.s0), (int)(v.s1)); } INLINE OVERLOADABLE uint2 convert_uint2(long2 v) { return (uint2)((uint)(v.s0), (uint)(v.s1)); } INLINE OVERLOADABLE short2 convert_short2(long2 v) { return (short2)((short)(v.s0), (short)(v.s1)); } INLINE OVERLOADABLE ushort2 convert_ushort2(long2 v) { return (ushort2)((ushort)(v.s0), (ushort)(v.s1)); } INLINE OVERLOADABLE char2 convert_char2(long2 v) { return (char2)((char)(v.s0), (char)(v.s1)); } INLINE OVERLOADABLE uchar2 convert_uchar2(long2 v) { return (uchar2)((uchar)(v.s0), (uchar)(v.s1)); } INLINE OVERLOADABLE double2 convert_double2(long2 v) { return (double2)((double)(v.s0), (double)(v.s1)); } INLINE OVERLOADABLE float2 convert_float2(long2 v) { return (float2)((float)(v.s0), (float)(v.s1)); } INLINE OVERLOADABLE long2 convert_long2(ulong2 v) { return (long2)((long)(v.s0), (long)(v.s1)); } INLINE OVERLOADABLE ulong2 convert_ulong2(ulong2 v) { return v; } INLINE OVERLOADABLE int2 convert_int2(ulong2 v) { return (int2)((int)(v.s0), (int)(v.s1)); } INLINE OVERLOADABLE uint2 convert_uint2(ulong2 v) { return (uint2)((uint)(v.s0), (uint)(v.s1)); } INLINE OVERLOADABLE short2 convert_short2(ulong2 v) { return (short2)((short)(v.s0), (short)(v.s1)); } INLINE OVERLOADABLE ushort2 convert_ushort2(ulong2 v) { return (ushort2)((ushort)(v.s0), (ushort)(v.s1)); } INLINE OVERLOADABLE char2 convert_char2(ulong2 v) { return (char2)((char)(v.s0), (char)(v.s1)); } INLINE OVERLOADABLE uchar2 convert_uchar2(ulong2 v) { return (uchar2)((uchar)(v.s0), (uchar)(v.s1)); } INLINE OVERLOADABLE double2 convert_double2(ulong2 v) { return (double2)((double)(v.s0), (double)(v.s1)); } INLINE OVERLOADABLE float2 convert_float2(ulong2 v) { return (float2)((float)(v.s0), (float)(v.s1)); } INLINE OVERLOADABLE long2 convert_long2(int2 v) { return (long2)((long)(v.s0), (long)(v.s1)); } INLINE OVERLOADABLE ulong2 convert_ulong2(int2 v) { return (ulong2)((ulong)(v.s0), (ulong)(v.s1)); } INLINE OVERLOADABLE int2 convert_int2(int2 v) { return v; } INLINE OVERLOADABLE uint2 convert_uint2(int2 v) { return (uint2)((uint)(v.s0), (uint)(v.s1)); } INLINE OVERLOADABLE short2 convert_short2(int2 v) { return (short2)((short)(v.s0), (short)(v.s1)); } INLINE OVERLOADABLE ushort2 convert_ushort2(int2 v) { return (ushort2)((ushort)(v.s0), (ushort)(v.s1)); } INLINE OVERLOADABLE char2 convert_char2(int2 v) { return (char2)((char)(v.s0), (char)(v.s1)); } INLINE OVERLOADABLE uchar2 convert_uchar2(int2 v) { return (uchar2)((uchar)(v.s0), (uchar)(v.s1)); } INLINE OVERLOADABLE double2 convert_double2(int2 v) { return (double2)((double)(v.s0), (double)(v.s1)); } INLINE OVERLOADABLE float2 convert_float2(int2 v) { return (float2)((float)(v.s0), (float)(v.s1)); } INLINE OVERLOADABLE long2 convert_long2(uint2 v) { return (long2)((long)(v.s0), (long)(v.s1)); } INLINE OVERLOADABLE ulong2 convert_ulong2(uint2 v) { return (ulong2)((ulong)(v.s0), (ulong)(v.s1)); } INLINE OVERLOADABLE int2 convert_int2(uint2 v) { return (int2)((int)(v.s0), (int)(v.s1)); } INLINE OVERLOADABLE uint2 convert_uint2(uint2 v) { return v; } INLINE OVERLOADABLE short2 convert_short2(uint2 v) { return (short2)((short)(v.s0), (short)(v.s1)); } INLINE OVERLOADABLE ushort2 convert_ushort2(uint2 v) { return (ushort2)((ushort)(v.s0), (ushort)(v.s1)); } INLINE OVERLOADABLE char2 convert_char2(uint2 v) { return (char2)((char)(v.s0), (char)(v.s1)); } INLINE OVERLOADABLE uchar2 convert_uchar2(uint2 v) { return (uchar2)((uchar)(v.s0), (uchar)(v.s1)); } INLINE OVERLOADABLE double2 convert_double2(uint2 v) { return (double2)((double)(v.s0), (double)(v.s1)); } INLINE OVERLOADABLE float2 convert_float2(uint2 v) { return (float2)((float)(v.s0), (float)(v.s1)); } INLINE OVERLOADABLE long2 convert_long2(short2 v) { return (long2)((long)(v.s0), (long)(v.s1)); } INLINE OVERLOADABLE ulong2 convert_ulong2(short2 v) { return (ulong2)((ulong)(v.s0), (ulong)(v.s1)); } INLINE OVERLOADABLE int2 convert_int2(short2 v) { return (int2)((int)(v.s0), (int)(v.s1)); } INLINE OVERLOADABLE uint2 convert_uint2(short2 v) { return (uint2)((uint)(v.s0), (uint)(v.s1)); } INLINE OVERLOADABLE short2 convert_short2(short2 v) { return v; } INLINE OVERLOADABLE ushort2 convert_ushort2(short2 v) { return (ushort2)((ushort)(v.s0), (ushort)(v.s1)); } INLINE OVERLOADABLE char2 convert_char2(short2 v) { return (char2)((char)(v.s0), (char)(v.s1)); } INLINE OVERLOADABLE uchar2 convert_uchar2(short2 v) { return (uchar2)((uchar)(v.s0), (uchar)(v.s1)); } INLINE OVERLOADABLE double2 convert_double2(short2 v) { return (double2)((double)(v.s0), (double)(v.s1)); } INLINE OVERLOADABLE float2 convert_float2(short2 v) { return (float2)((float)(v.s0), (float)(v.s1)); } INLINE OVERLOADABLE long2 convert_long2(ushort2 v) { return (long2)((long)(v.s0), (long)(v.s1)); } INLINE OVERLOADABLE ulong2 convert_ulong2(ushort2 v) { return (ulong2)((ulong)(v.s0), (ulong)(v.s1)); } INLINE OVERLOADABLE int2 convert_int2(ushort2 v) { return (int2)((int)(v.s0), (int)(v.s1)); } INLINE OVERLOADABLE uint2 convert_uint2(ushort2 v) { return (uint2)((uint)(v.s0), (uint)(v.s1)); } INLINE OVERLOADABLE short2 convert_short2(ushort2 v) { return (short2)((short)(v.s0), (short)(v.s1)); } INLINE OVERLOADABLE ushort2 convert_ushort2(ushort2 v) { return v; } INLINE OVERLOADABLE char2 convert_char2(ushort2 v) { return (char2)((char)(v.s0), (char)(v.s1)); } INLINE OVERLOADABLE uchar2 convert_uchar2(ushort2 v) { return (uchar2)((uchar)(v.s0), (uchar)(v.s1)); } INLINE OVERLOADABLE double2 convert_double2(ushort2 v) { return (double2)((double)(v.s0), (double)(v.s1)); } INLINE OVERLOADABLE float2 convert_float2(ushort2 v) { return (float2)((float)(v.s0), (float)(v.s1)); } INLINE OVERLOADABLE long2 convert_long2(char2 v) { return (long2)((long)(v.s0), (long)(v.s1)); } INLINE OVERLOADABLE ulong2 convert_ulong2(char2 v) { return (ulong2)((ulong)(v.s0), (ulong)(v.s1)); } INLINE OVERLOADABLE int2 convert_int2(char2 v) { return (int2)((int)(v.s0), (int)(v.s1)); } INLINE OVERLOADABLE uint2 convert_uint2(char2 v) { return (uint2)((uint)(v.s0), (uint)(v.s1)); } INLINE OVERLOADABLE short2 convert_short2(char2 v) { return (short2)((short)(v.s0), (short)(v.s1)); } INLINE OVERLOADABLE ushort2 convert_ushort2(char2 v) { return (ushort2)((ushort)(v.s0), (ushort)(v.s1)); } INLINE OVERLOADABLE char2 convert_char2(char2 v) { return v; } INLINE OVERLOADABLE uchar2 convert_uchar2(char2 v) { return (uchar2)((uchar)(v.s0), (uchar)(v.s1)); } INLINE OVERLOADABLE double2 convert_double2(char2 v) { return (double2)((double)(v.s0), (double)(v.s1)); } INLINE OVERLOADABLE float2 convert_float2(char2 v) { return (float2)((float)(v.s0), (float)(v.s1)); } INLINE OVERLOADABLE long2 convert_long2(uchar2 v) { return (long2)((long)(v.s0), (long)(v.s1)); } INLINE OVERLOADABLE ulong2 convert_ulong2(uchar2 v) { return (ulong2)((ulong)(v.s0), (ulong)(v.s1)); } INLINE OVERLOADABLE int2 convert_int2(uchar2 v) { return (int2)((int)(v.s0), (int)(v.s1)); } INLINE OVERLOADABLE uint2 convert_uint2(uchar2 v) { return (uint2)((uint)(v.s0), (uint)(v.s1)); } INLINE OVERLOADABLE short2 convert_short2(uchar2 v) { return (short2)((short)(v.s0), (short)(v.s1)); } INLINE OVERLOADABLE ushort2 convert_ushort2(uchar2 v) { return (ushort2)((ushort)(v.s0), (ushort)(v.s1)); } INLINE OVERLOADABLE char2 convert_char2(uchar2 v) { return (char2)((char)(v.s0), (char)(v.s1)); } INLINE OVERLOADABLE uchar2 convert_uchar2(uchar2 v) { return v; } INLINE OVERLOADABLE double2 convert_double2(uchar2 v) { return (double2)((double)(v.s0), (double)(v.s1)); } INLINE OVERLOADABLE float2 convert_float2(uchar2 v) { return (float2)((float)(v.s0), (float)(v.s1)); } INLINE OVERLOADABLE long2 convert_long2(double2 v) { return (long2)((long)(v.s0), (long)(v.s1)); } INLINE OVERLOADABLE ulong2 convert_ulong2(double2 v) { return (ulong2)((ulong)(v.s0), (ulong)(v.s1)); } INLINE OVERLOADABLE int2 convert_int2(double2 v) { return (int2)((int)(v.s0), (int)(v.s1)); } INLINE OVERLOADABLE uint2 convert_uint2(double2 v) { return (uint2)((uint)(v.s0), (uint)(v.s1)); } INLINE OVERLOADABLE short2 convert_short2(double2 v) { return (short2)((short)(v.s0), (short)(v.s1)); } INLINE OVERLOADABLE ushort2 convert_ushort2(double2 v) { return (ushort2)((ushort)(v.s0), (ushort)(v.s1)); } INLINE OVERLOADABLE char2 convert_char2(double2 v) { return (char2)((char)(v.s0), (char)(v.s1)); } INLINE OVERLOADABLE uchar2 convert_uchar2(double2 v) { return (uchar2)((uchar)(v.s0), (uchar)(v.s1)); } INLINE OVERLOADABLE double2 convert_double2(double2 v) { return v; } INLINE OVERLOADABLE float2 convert_float2(double2 v) { return (float2)((float)(v.s0), (float)(v.s1)); } INLINE OVERLOADABLE long2 convert_long2(float2 v) { return (long2)((long)(v.s0), (long)(v.s1)); } INLINE OVERLOADABLE ulong2 convert_ulong2(float2 v) { return (ulong2)((ulong)(v.s0), (ulong)(v.s1)); } INLINE OVERLOADABLE int2 convert_int2(float2 v) { return (int2)((int)(v.s0), (int)(v.s1)); } INLINE OVERLOADABLE uint2 convert_uint2(float2 v) { return (uint2)((uint)(v.s0), (uint)(v.s1)); } INLINE OVERLOADABLE short2 convert_short2(float2 v) { return (short2)((short)(v.s0), (short)(v.s1)); } INLINE OVERLOADABLE ushort2 convert_ushort2(float2 v) { return (ushort2)((ushort)(v.s0), (ushort)(v.s1)); } INLINE OVERLOADABLE char2 convert_char2(float2 v) { return (char2)((char)(v.s0), (char)(v.s1)); } INLINE OVERLOADABLE uchar2 convert_uchar2(float2 v) { return (uchar2)((uchar)(v.s0), (uchar)(v.s1)); } INLINE OVERLOADABLE double2 convert_double2(float2 v) { return (double2)((double)(v.s0), (double)(v.s1)); } INLINE OVERLOADABLE float2 convert_float2(float2 v) { return v; } INLINE OVERLOADABLE long3 convert_long3(long3 v) { return v; } INLINE OVERLOADABLE ulong3 convert_ulong3(long3 v) { return (ulong3)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2)); } INLINE OVERLOADABLE int3 convert_int3(long3 v) { return (int3)((int)(v.s0), (int)(v.s1), (int)(v.s2)); } INLINE OVERLOADABLE uint3 convert_uint3(long3 v) { return (uint3)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2)); } INLINE OVERLOADABLE short3 convert_short3(long3 v) { return (short3)((short)(v.s0), (short)(v.s1), (short)(v.s2)); } INLINE OVERLOADABLE ushort3 convert_ushort3(long3 v) { return (ushort3)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2)); } INLINE OVERLOADABLE char3 convert_char3(long3 v) { return (char3)((char)(v.s0), (char)(v.s1), (char)(v.s2)); } INLINE OVERLOADABLE uchar3 convert_uchar3(long3 v) { return (uchar3)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2)); } INLINE OVERLOADABLE double3 convert_double3(long3 v) { return (double3)((double)(v.s0), (double)(v.s1), (double)(v.s2)); } INLINE OVERLOADABLE float3 convert_float3(long3 v) { return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2)); } INLINE OVERLOADABLE long3 convert_long3(ulong3 v) { return (long3)((long)(v.s0), (long)(v.s1), (long)(v.s2)); } INLINE OVERLOADABLE ulong3 convert_ulong3(ulong3 v) { return v; } INLINE OVERLOADABLE int3 convert_int3(ulong3 v) { return (int3)((int)(v.s0), (int)(v.s1), (int)(v.s2)); } INLINE OVERLOADABLE uint3 convert_uint3(ulong3 v) { return (uint3)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2)); } INLINE OVERLOADABLE short3 convert_short3(ulong3 v) { return (short3)((short)(v.s0), (short)(v.s1), (short)(v.s2)); } INLINE OVERLOADABLE ushort3 convert_ushort3(ulong3 v) { return (ushort3)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2)); } INLINE OVERLOADABLE char3 convert_char3(ulong3 v) { return (char3)((char)(v.s0), (char)(v.s1), (char)(v.s2)); } INLINE OVERLOADABLE uchar3 convert_uchar3(ulong3 v) { return (uchar3)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2)); } INLINE OVERLOADABLE double3 convert_double3(ulong3 v) { return (double3)((double)(v.s0), (double)(v.s1), (double)(v.s2)); } INLINE OVERLOADABLE float3 convert_float3(ulong3 v) { return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2)); } INLINE OVERLOADABLE long3 convert_long3(int3 v) { return (long3)((long)(v.s0), (long)(v.s1), (long)(v.s2)); } INLINE OVERLOADABLE ulong3 convert_ulong3(int3 v) { return (ulong3)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2)); } INLINE OVERLOADABLE int3 convert_int3(int3 v) { return v; } INLINE OVERLOADABLE uint3 convert_uint3(int3 v) { return (uint3)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2)); } INLINE OVERLOADABLE short3 convert_short3(int3 v) { return (short3)((short)(v.s0), (short)(v.s1), (short)(v.s2)); } INLINE OVERLOADABLE ushort3 convert_ushort3(int3 v) { return (ushort3)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2)); } INLINE OVERLOADABLE char3 convert_char3(int3 v) { return (char3)((char)(v.s0), (char)(v.s1), (char)(v.s2)); } INLINE OVERLOADABLE uchar3 convert_uchar3(int3 v) { return (uchar3)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2)); } INLINE OVERLOADABLE double3 convert_double3(int3 v) { return (double3)((double)(v.s0), (double)(v.s1), (double)(v.s2)); } INLINE OVERLOADABLE float3 convert_float3(int3 v) { return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2)); } INLINE OVERLOADABLE long3 convert_long3(uint3 v) { return (long3)((long)(v.s0), (long)(v.s1), (long)(v.s2)); } INLINE OVERLOADABLE ulong3 convert_ulong3(uint3 v) { return (ulong3)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2)); } INLINE OVERLOADABLE int3 convert_int3(uint3 v) { return (int3)((int)(v.s0), (int)(v.s1), (int)(v.s2)); } INLINE OVERLOADABLE uint3 convert_uint3(uint3 v) { return v; } INLINE OVERLOADABLE short3 convert_short3(uint3 v) { return (short3)((short)(v.s0), (short)(v.s1), (short)(v.s2)); } INLINE OVERLOADABLE ushort3 convert_ushort3(uint3 v) { return (ushort3)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2)); } INLINE OVERLOADABLE char3 convert_char3(uint3 v) { return (char3)((char)(v.s0), (char)(v.s1), (char)(v.s2)); } INLINE OVERLOADABLE uchar3 convert_uchar3(uint3 v) { return (uchar3)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2)); } INLINE OVERLOADABLE double3 convert_double3(uint3 v) { return (double3)((double)(v.s0), (double)(v.s1), (double)(v.s2)); } INLINE OVERLOADABLE float3 convert_float3(uint3 v) { return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2)); } INLINE OVERLOADABLE long3 convert_long3(short3 v) { return (long3)((long)(v.s0), (long)(v.s1), (long)(v.s2)); } INLINE OVERLOADABLE ulong3 convert_ulong3(short3 v) { return (ulong3)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2)); } INLINE OVERLOADABLE int3 convert_int3(short3 v) { return (int3)((int)(v.s0), (int)(v.s1), (int)(v.s2)); } INLINE OVERLOADABLE uint3 convert_uint3(short3 v) { return (uint3)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2)); } INLINE OVERLOADABLE short3 convert_short3(short3 v) { return v; } INLINE OVERLOADABLE ushort3 convert_ushort3(short3 v) { return (ushort3)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2)); } INLINE OVERLOADABLE char3 convert_char3(short3 v) { return (char3)((char)(v.s0), (char)(v.s1), (char)(v.s2)); } INLINE OVERLOADABLE uchar3 convert_uchar3(short3 v) { return (uchar3)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2)); } INLINE OVERLOADABLE double3 convert_double3(short3 v) { return (double3)((double)(v.s0), (double)(v.s1), (double)(v.s2)); } INLINE OVERLOADABLE float3 convert_float3(short3 v) { return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2)); } INLINE OVERLOADABLE long3 convert_long3(ushort3 v) { return (long3)((long)(v.s0), (long)(v.s1), (long)(v.s2)); } INLINE OVERLOADABLE ulong3 convert_ulong3(ushort3 v) { return (ulong3)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2)); } INLINE OVERLOADABLE int3 convert_int3(ushort3 v) { return (int3)((int)(v.s0), (int)(v.s1), (int)(v.s2)); } INLINE OVERLOADABLE uint3 convert_uint3(ushort3 v) { return (uint3)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2)); } INLINE OVERLOADABLE short3 convert_short3(ushort3 v) { return (short3)((short)(v.s0), (short)(v.s1), (short)(v.s2)); } INLINE OVERLOADABLE ushort3 convert_ushort3(ushort3 v) { return v; } INLINE OVERLOADABLE char3 convert_char3(ushort3 v) { return (char3)((char)(v.s0), (char)(v.s1), (char)(v.s2)); } INLINE OVERLOADABLE uchar3 convert_uchar3(ushort3 v) { return (uchar3)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2)); } INLINE OVERLOADABLE double3 convert_double3(ushort3 v) { return (double3)((double)(v.s0), (double)(v.s1), (double)(v.s2)); } INLINE OVERLOADABLE float3 convert_float3(ushort3 v) { return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2)); } INLINE OVERLOADABLE long3 convert_long3(char3 v) { return (long3)((long)(v.s0), (long)(v.s1), (long)(v.s2)); } INLINE OVERLOADABLE ulong3 convert_ulong3(char3 v) { return (ulong3)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2)); } INLINE OVERLOADABLE int3 convert_int3(char3 v) { return (int3)((int)(v.s0), (int)(v.s1), (int)(v.s2)); } INLINE OVERLOADABLE uint3 convert_uint3(char3 v) { return (uint3)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2)); } INLINE OVERLOADABLE short3 convert_short3(char3 v) { return (short3)((short)(v.s0), (short)(v.s1), (short)(v.s2)); } INLINE OVERLOADABLE ushort3 convert_ushort3(char3 v) { return (ushort3)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2)); } INLINE OVERLOADABLE char3 convert_char3(char3 v) { return v; } INLINE OVERLOADABLE uchar3 convert_uchar3(char3 v) { return (uchar3)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2)); } INLINE OVERLOADABLE double3 convert_double3(char3 v) { return (double3)((double)(v.s0), (double)(v.s1), (double)(v.s2)); } INLINE OVERLOADABLE float3 convert_float3(char3 v) { return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2)); } INLINE OVERLOADABLE long3 convert_long3(uchar3 v) { return (long3)((long)(v.s0), (long)(v.s1), (long)(v.s2)); } INLINE OVERLOADABLE ulong3 convert_ulong3(uchar3 v) { return (ulong3)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2)); } INLINE OVERLOADABLE int3 convert_int3(uchar3 v) { return (int3)((int)(v.s0), (int)(v.s1), (int)(v.s2)); } INLINE OVERLOADABLE uint3 convert_uint3(uchar3 v) { return (uint3)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2)); } INLINE OVERLOADABLE short3 convert_short3(uchar3 v) { return (short3)((short)(v.s0), (short)(v.s1), (short)(v.s2)); } INLINE OVERLOADABLE ushort3 convert_ushort3(uchar3 v) { return (ushort3)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2)); } INLINE OVERLOADABLE char3 convert_char3(uchar3 v) { return (char3)((char)(v.s0), (char)(v.s1), (char)(v.s2)); } INLINE OVERLOADABLE uchar3 convert_uchar3(uchar3 v) { return v; } INLINE OVERLOADABLE double3 convert_double3(uchar3 v) { return (double3)((double)(v.s0), (double)(v.s1), (double)(v.s2)); } INLINE OVERLOADABLE float3 convert_float3(uchar3 v) { return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2)); } INLINE OVERLOADABLE long3 convert_long3(double3 v) { return (long3)((long)(v.s0), (long)(v.s1), (long)(v.s2)); } INLINE OVERLOADABLE ulong3 convert_ulong3(double3 v) { return (ulong3)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2)); } INLINE OVERLOADABLE int3 convert_int3(double3 v) { return (int3)((int)(v.s0), (int)(v.s1), (int)(v.s2)); } INLINE OVERLOADABLE uint3 convert_uint3(double3 v) { return (uint3)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2)); } INLINE OVERLOADABLE short3 convert_short3(double3 v) { return (short3)((short)(v.s0), (short)(v.s1), (short)(v.s2)); } INLINE OVERLOADABLE ushort3 convert_ushort3(double3 v) { return (ushort3)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2)); } INLINE OVERLOADABLE char3 convert_char3(double3 v) { return (char3)((char)(v.s0), (char)(v.s1), (char)(v.s2)); } INLINE OVERLOADABLE uchar3 convert_uchar3(double3 v) { return (uchar3)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2)); } INLINE OVERLOADABLE double3 convert_double3(double3 v) { return v; } INLINE OVERLOADABLE float3 convert_float3(double3 v) { return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2)); } INLINE OVERLOADABLE long3 convert_long3(float3 v) { return (long3)((long)(v.s0), (long)(v.s1), (long)(v.s2)); } INLINE OVERLOADABLE ulong3 convert_ulong3(float3 v) { return (ulong3)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2)); } INLINE OVERLOADABLE int3 convert_int3(float3 v) { return (int3)((int)(v.s0), (int)(v.s1), (int)(v.s2)); } INLINE OVERLOADABLE uint3 convert_uint3(float3 v) { return (uint3)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2)); } INLINE OVERLOADABLE short3 convert_short3(float3 v) { return (short3)((short)(v.s0), (short)(v.s1), (short)(v.s2)); } INLINE OVERLOADABLE ushort3 convert_ushort3(float3 v) { return (ushort3)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2)); } INLINE OVERLOADABLE char3 convert_char3(float3 v) { return (char3)((char)(v.s0), (char)(v.s1), (char)(v.s2)); } INLINE OVERLOADABLE uchar3 convert_uchar3(float3 v) { return (uchar3)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2)); } INLINE OVERLOADABLE double3 convert_double3(float3 v) { return (double3)((double)(v.s0), (double)(v.s1), (double)(v.s2)); } INLINE OVERLOADABLE float3 convert_float3(float3 v) { return v; } INLINE OVERLOADABLE long4 convert_long4(long4 v) { return v; } INLINE OVERLOADABLE ulong4 convert_ulong4(long4 v) { return (ulong4)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3)); } INLINE OVERLOADABLE int4 convert_int4(long4 v) { return (int4)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3)); } INLINE OVERLOADABLE uint4 convert_uint4(long4 v) { return (uint4)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3)); } INLINE OVERLOADABLE short4 convert_short4(long4 v) { return (short4)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3)); } INLINE OVERLOADABLE ushort4 convert_ushort4(long4 v) { return (ushort4)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3)); } INLINE OVERLOADABLE char4 convert_char4(long4 v) { return (char4)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3)); } INLINE OVERLOADABLE uchar4 convert_uchar4(long4 v) { return (uchar4)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3)); } INLINE OVERLOADABLE double4 convert_double4(long4 v) { return (double4)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3)); } INLINE OVERLOADABLE float4 convert_float4(long4 v) { return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3)); } INLINE OVERLOADABLE long4 convert_long4(ulong4 v) { return (long4)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3)); } INLINE OVERLOADABLE ulong4 convert_ulong4(ulong4 v) { return v; } INLINE OVERLOADABLE int4 convert_int4(ulong4 v) { return (int4)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3)); } INLINE OVERLOADABLE uint4 convert_uint4(ulong4 v) { return (uint4)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3)); } INLINE OVERLOADABLE short4 convert_short4(ulong4 v) { return (short4)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3)); } INLINE OVERLOADABLE ushort4 convert_ushort4(ulong4 v) { return (ushort4)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3)); } INLINE OVERLOADABLE char4 convert_char4(ulong4 v) { return (char4)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3)); } INLINE OVERLOADABLE uchar4 convert_uchar4(ulong4 v) { return (uchar4)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3)); } INLINE OVERLOADABLE double4 convert_double4(ulong4 v) { return (double4)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3)); } INLINE OVERLOADABLE float4 convert_float4(ulong4 v) { return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3)); } INLINE OVERLOADABLE long4 convert_long4(int4 v) { return (long4)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3)); } INLINE OVERLOADABLE ulong4 convert_ulong4(int4 v) { return (ulong4)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3)); } INLINE OVERLOADABLE int4 convert_int4(int4 v) { return v; } INLINE OVERLOADABLE uint4 convert_uint4(int4 v) { return (uint4)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3)); } INLINE OVERLOADABLE short4 convert_short4(int4 v) { return (short4)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3)); } INLINE OVERLOADABLE ushort4 convert_ushort4(int4 v) { return (ushort4)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3)); } INLINE OVERLOADABLE char4 convert_char4(int4 v) { return (char4)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3)); } INLINE OVERLOADABLE uchar4 convert_uchar4(int4 v) { return (uchar4)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3)); } INLINE OVERLOADABLE double4 convert_double4(int4 v) { return (double4)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3)); } INLINE OVERLOADABLE float4 convert_float4(int4 v) { return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3)); } INLINE OVERLOADABLE long4 convert_long4(uint4 v) { return (long4)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3)); } INLINE OVERLOADABLE ulong4 convert_ulong4(uint4 v) { return (ulong4)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3)); } INLINE OVERLOADABLE int4 convert_int4(uint4 v) { return (int4)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3)); } INLINE OVERLOADABLE uint4 convert_uint4(uint4 v) { return v; } INLINE OVERLOADABLE short4 convert_short4(uint4 v) { return (short4)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3)); } INLINE OVERLOADABLE ushort4 convert_ushort4(uint4 v) { return (ushort4)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3)); } INLINE OVERLOADABLE char4 convert_char4(uint4 v) { return (char4)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3)); } INLINE OVERLOADABLE uchar4 convert_uchar4(uint4 v) { return (uchar4)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3)); } INLINE OVERLOADABLE double4 convert_double4(uint4 v) { return (double4)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3)); } INLINE OVERLOADABLE float4 convert_float4(uint4 v) { return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3)); } INLINE OVERLOADABLE long4 convert_long4(short4 v) { return (long4)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3)); } INLINE OVERLOADABLE ulong4 convert_ulong4(short4 v) { return (ulong4)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3)); } INLINE OVERLOADABLE int4 convert_int4(short4 v) { return (int4)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3)); } INLINE OVERLOADABLE uint4 convert_uint4(short4 v) { return (uint4)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3)); } INLINE OVERLOADABLE short4 convert_short4(short4 v) { return v; } INLINE OVERLOADABLE ushort4 convert_ushort4(short4 v) { return (ushort4)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3)); } INLINE OVERLOADABLE char4 convert_char4(short4 v) { return (char4)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3)); } INLINE OVERLOADABLE uchar4 convert_uchar4(short4 v) { return (uchar4)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3)); } INLINE OVERLOADABLE double4 convert_double4(short4 v) { return (double4)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3)); } INLINE OVERLOADABLE float4 convert_float4(short4 v) { return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3)); } INLINE OVERLOADABLE long4 convert_long4(ushort4 v) { return (long4)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3)); } INLINE OVERLOADABLE ulong4 convert_ulong4(ushort4 v) { return (ulong4)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3)); } INLINE OVERLOADABLE int4 convert_int4(ushort4 v) { return (int4)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3)); } INLINE OVERLOADABLE uint4 convert_uint4(ushort4 v) { return (uint4)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3)); } INLINE OVERLOADABLE short4 convert_short4(ushort4 v) { return (short4)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3)); } INLINE OVERLOADABLE ushort4 convert_ushort4(ushort4 v) { return v; } INLINE OVERLOADABLE char4 convert_char4(ushort4 v) { return (char4)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3)); } INLINE OVERLOADABLE uchar4 convert_uchar4(ushort4 v) { return (uchar4)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3)); } INLINE OVERLOADABLE double4 convert_double4(ushort4 v) { return (double4)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3)); } INLINE OVERLOADABLE float4 convert_float4(ushort4 v) { return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3)); } INLINE OVERLOADABLE long4 convert_long4(char4 v) { return (long4)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3)); } INLINE OVERLOADABLE ulong4 convert_ulong4(char4 v) { return (ulong4)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3)); } INLINE OVERLOADABLE int4 convert_int4(char4 v) { return (int4)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3)); } INLINE OVERLOADABLE uint4 convert_uint4(char4 v) { return (uint4)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3)); } INLINE OVERLOADABLE short4 convert_short4(char4 v) { return (short4)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3)); } INLINE OVERLOADABLE ushort4 convert_ushort4(char4 v) { return (ushort4)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3)); } INLINE OVERLOADABLE char4 convert_char4(char4 v) { return v; } INLINE OVERLOADABLE uchar4 convert_uchar4(char4 v) { return (uchar4)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3)); } INLINE OVERLOADABLE double4 convert_double4(char4 v) { return (double4)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3)); } INLINE OVERLOADABLE float4 convert_float4(char4 v) { return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3)); } INLINE OVERLOADABLE long4 convert_long4(uchar4 v) { return (long4)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3)); } INLINE OVERLOADABLE ulong4 convert_ulong4(uchar4 v) { return (ulong4)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3)); } INLINE OVERLOADABLE int4 convert_int4(uchar4 v) { return (int4)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3)); } INLINE OVERLOADABLE uint4 convert_uint4(uchar4 v) { return (uint4)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3)); } INLINE OVERLOADABLE short4 convert_short4(uchar4 v) { return (short4)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3)); } INLINE OVERLOADABLE ushort4 convert_ushort4(uchar4 v) { return (ushort4)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3)); } INLINE OVERLOADABLE char4 convert_char4(uchar4 v) { return (char4)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3)); } INLINE OVERLOADABLE uchar4 convert_uchar4(uchar4 v) { return v; } INLINE OVERLOADABLE double4 convert_double4(uchar4 v) { return (double4)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3)); } INLINE OVERLOADABLE float4 convert_float4(uchar4 v) { return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3)); } INLINE OVERLOADABLE long4 convert_long4(double4 v) { return (long4)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3)); } INLINE OVERLOADABLE ulong4 convert_ulong4(double4 v) { return (ulong4)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3)); } INLINE OVERLOADABLE int4 convert_int4(double4 v) { return (int4)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3)); } INLINE OVERLOADABLE uint4 convert_uint4(double4 v) { return (uint4)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3)); } INLINE OVERLOADABLE short4 convert_short4(double4 v) { return (short4)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3)); } INLINE OVERLOADABLE ushort4 convert_ushort4(double4 v) { return (ushort4)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3)); } INLINE OVERLOADABLE char4 convert_char4(double4 v) { return (char4)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3)); } INLINE OVERLOADABLE uchar4 convert_uchar4(double4 v) { return (uchar4)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3)); } INLINE OVERLOADABLE double4 convert_double4(double4 v) { return v; } INLINE OVERLOADABLE float4 convert_float4(double4 v) { return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3)); } INLINE OVERLOADABLE long4 convert_long4(float4 v) { return (long4)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3)); } INLINE OVERLOADABLE ulong4 convert_ulong4(float4 v) { return (ulong4)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3)); } INLINE OVERLOADABLE int4 convert_int4(float4 v) { return (int4)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3)); } INLINE OVERLOADABLE uint4 convert_uint4(float4 v) { return (uint4)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3)); } INLINE OVERLOADABLE short4 convert_short4(float4 v) { return (short4)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3)); } INLINE OVERLOADABLE ushort4 convert_ushort4(float4 v) { return (ushort4)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3)); } INLINE OVERLOADABLE char4 convert_char4(float4 v) { return (char4)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3)); } INLINE OVERLOADABLE uchar4 convert_uchar4(float4 v) { return (uchar4)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3)); } INLINE OVERLOADABLE double4 convert_double4(float4 v) { return (double4)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3)); } INLINE OVERLOADABLE float4 convert_float4(float4 v) { return v; } INLINE OVERLOADABLE long8 convert_long8(long8 v) { return v; } INLINE OVERLOADABLE ulong8 convert_ulong8(long8 v) { return (ulong8)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7)); } INLINE OVERLOADABLE int8 convert_int8(long8 v) { return (int8)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7)); } INLINE OVERLOADABLE uint8 convert_uint8(long8 v) { return (uint8)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7)); } INLINE OVERLOADABLE short8 convert_short8(long8 v) { return (short8)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7)); } INLINE OVERLOADABLE ushort8 convert_ushort8(long8 v) { return (ushort8)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7)); } INLINE OVERLOADABLE char8 convert_char8(long8 v) { return (char8)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7)); } INLINE OVERLOADABLE uchar8 convert_uchar8(long8 v) { return (uchar8)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7)); } INLINE OVERLOADABLE double8 convert_double8(long8 v) { return (double8)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3), (double)(v.s4), (double)(v.s5), (double)(v.s6), (double)(v.s7)); } INLINE OVERLOADABLE float8 convert_float8(long8 v) { return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7)); } INLINE OVERLOADABLE long8 convert_long8(ulong8 v) { return (long8)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7)); } INLINE OVERLOADABLE ulong8 convert_ulong8(ulong8 v) { return v; } INLINE OVERLOADABLE int8 convert_int8(ulong8 v) { return (int8)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7)); } INLINE OVERLOADABLE uint8 convert_uint8(ulong8 v) { return (uint8)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7)); } INLINE OVERLOADABLE short8 convert_short8(ulong8 v) { return (short8)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7)); } INLINE OVERLOADABLE ushort8 convert_ushort8(ulong8 v) { return (ushort8)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7)); } INLINE OVERLOADABLE char8 convert_char8(ulong8 v) { return (char8)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7)); } INLINE OVERLOADABLE uchar8 convert_uchar8(ulong8 v) { return (uchar8)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7)); } INLINE OVERLOADABLE double8 convert_double8(ulong8 v) { return (double8)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3), (double)(v.s4), (double)(v.s5), (double)(v.s6), (double)(v.s7)); } INLINE OVERLOADABLE float8 convert_float8(ulong8 v) { return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7)); } INLINE OVERLOADABLE long8 convert_long8(int8 v) { return (long8)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7)); } INLINE OVERLOADABLE ulong8 convert_ulong8(int8 v) { return (ulong8)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7)); } INLINE OVERLOADABLE int8 convert_int8(int8 v) { return v; } INLINE OVERLOADABLE uint8 convert_uint8(int8 v) { return (uint8)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7)); } INLINE OVERLOADABLE short8 convert_short8(int8 v) { return (short8)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7)); } INLINE OVERLOADABLE ushort8 convert_ushort8(int8 v) { return (ushort8)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7)); } INLINE OVERLOADABLE char8 convert_char8(int8 v) { return (char8)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7)); } INLINE OVERLOADABLE uchar8 convert_uchar8(int8 v) { return (uchar8)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7)); } INLINE OVERLOADABLE double8 convert_double8(int8 v) { return (double8)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3), (double)(v.s4), (double)(v.s5), (double)(v.s6), (double)(v.s7)); } INLINE OVERLOADABLE float8 convert_float8(int8 v) { return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7)); } INLINE OVERLOADABLE long8 convert_long8(uint8 v) { return (long8)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7)); } INLINE OVERLOADABLE ulong8 convert_ulong8(uint8 v) { return (ulong8)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7)); } INLINE OVERLOADABLE int8 convert_int8(uint8 v) { return (int8)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7)); } INLINE OVERLOADABLE uint8 convert_uint8(uint8 v) { return v; } INLINE OVERLOADABLE short8 convert_short8(uint8 v) { return (short8)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7)); } INLINE OVERLOADABLE ushort8 convert_ushort8(uint8 v) { return (ushort8)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7)); } INLINE OVERLOADABLE char8 convert_char8(uint8 v) { return (char8)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7)); } INLINE OVERLOADABLE uchar8 convert_uchar8(uint8 v) { return (uchar8)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7)); } INLINE OVERLOADABLE double8 convert_double8(uint8 v) { return (double8)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3), (double)(v.s4), (double)(v.s5), (double)(v.s6), (double)(v.s7)); } INLINE OVERLOADABLE float8 convert_float8(uint8 v) { return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7)); } INLINE OVERLOADABLE long8 convert_long8(short8 v) { return (long8)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7)); } INLINE OVERLOADABLE ulong8 convert_ulong8(short8 v) { return (ulong8)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7)); } INLINE OVERLOADABLE int8 convert_int8(short8 v) { return (int8)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7)); } INLINE OVERLOADABLE uint8 convert_uint8(short8 v) { return (uint8)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7)); } INLINE OVERLOADABLE short8 convert_short8(short8 v) { return v; } INLINE OVERLOADABLE ushort8 convert_ushort8(short8 v) { return (ushort8)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7)); } INLINE OVERLOADABLE char8 convert_char8(short8 v) { return (char8)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7)); } INLINE OVERLOADABLE uchar8 convert_uchar8(short8 v) { return (uchar8)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7)); } INLINE OVERLOADABLE double8 convert_double8(short8 v) { return (double8)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3), (double)(v.s4), (double)(v.s5), (double)(v.s6), (double)(v.s7)); } INLINE OVERLOADABLE float8 convert_float8(short8 v) { return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7)); } INLINE OVERLOADABLE long8 convert_long8(ushort8 v) { return (long8)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7)); } INLINE OVERLOADABLE ulong8 convert_ulong8(ushort8 v) { return (ulong8)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7)); } INLINE OVERLOADABLE int8 convert_int8(ushort8 v) { return (int8)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7)); } INLINE OVERLOADABLE uint8 convert_uint8(ushort8 v) { return (uint8)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7)); } INLINE OVERLOADABLE short8 convert_short8(ushort8 v) { return (short8)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7)); } INLINE OVERLOADABLE ushort8 convert_ushort8(ushort8 v) { return v; } INLINE OVERLOADABLE char8 convert_char8(ushort8 v) { return (char8)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7)); } INLINE OVERLOADABLE uchar8 convert_uchar8(ushort8 v) { return (uchar8)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7)); } INLINE OVERLOADABLE double8 convert_double8(ushort8 v) { return (double8)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3), (double)(v.s4), (double)(v.s5), (double)(v.s6), (double)(v.s7)); } INLINE OVERLOADABLE float8 convert_float8(ushort8 v) { return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7)); } INLINE OVERLOADABLE long8 convert_long8(char8 v) { return (long8)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7)); } INLINE OVERLOADABLE ulong8 convert_ulong8(char8 v) { return (ulong8)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7)); } INLINE OVERLOADABLE int8 convert_int8(char8 v) { return (int8)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7)); } INLINE OVERLOADABLE uint8 convert_uint8(char8 v) { return (uint8)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7)); } INLINE OVERLOADABLE short8 convert_short8(char8 v) { return (short8)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7)); } INLINE OVERLOADABLE ushort8 convert_ushort8(char8 v) { return (ushort8)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7)); } INLINE OVERLOADABLE char8 convert_char8(char8 v) { return v; } INLINE OVERLOADABLE uchar8 convert_uchar8(char8 v) { return (uchar8)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7)); } INLINE OVERLOADABLE double8 convert_double8(char8 v) { return (double8)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3), (double)(v.s4), (double)(v.s5), (double)(v.s6), (double)(v.s7)); } INLINE OVERLOADABLE float8 convert_float8(char8 v) { return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7)); } INLINE OVERLOADABLE long8 convert_long8(uchar8 v) { return (long8)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7)); } INLINE OVERLOADABLE ulong8 convert_ulong8(uchar8 v) { return (ulong8)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7)); } INLINE OVERLOADABLE int8 convert_int8(uchar8 v) { return (int8)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7)); } INLINE OVERLOADABLE uint8 convert_uint8(uchar8 v) { return (uint8)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7)); } INLINE OVERLOADABLE short8 convert_short8(uchar8 v) { return (short8)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7)); } INLINE OVERLOADABLE ushort8 convert_ushort8(uchar8 v) { return (ushort8)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7)); } INLINE OVERLOADABLE char8 convert_char8(uchar8 v) { return (char8)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7)); } INLINE OVERLOADABLE uchar8 convert_uchar8(uchar8 v) { return v; } INLINE OVERLOADABLE double8 convert_double8(uchar8 v) { return (double8)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3), (double)(v.s4), (double)(v.s5), (double)(v.s6), (double)(v.s7)); } INLINE OVERLOADABLE float8 convert_float8(uchar8 v) { return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7)); } INLINE OVERLOADABLE long8 convert_long8(double8 v) { return (long8)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7)); } INLINE OVERLOADABLE ulong8 convert_ulong8(double8 v) { return (ulong8)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7)); } INLINE OVERLOADABLE int8 convert_int8(double8 v) { return (int8)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7)); } INLINE OVERLOADABLE uint8 convert_uint8(double8 v) { return (uint8)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7)); } INLINE OVERLOADABLE short8 convert_short8(double8 v) { return (short8)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7)); } INLINE OVERLOADABLE ushort8 convert_ushort8(double8 v) { return (ushort8)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7)); } INLINE OVERLOADABLE char8 convert_char8(double8 v) { return (char8)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7)); } INLINE OVERLOADABLE uchar8 convert_uchar8(double8 v) { return (uchar8)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7)); } INLINE OVERLOADABLE double8 convert_double8(double8 v) { return v; } INLINE OVERLOADABLE float8 convert_float8(double8 v) { return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7)); } INLINE OVERLOADABLE long8 convert_long8(float8 v) { return (long8)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7)); } INLINE OVERLOADABLE ulong8 convert_ulong8(float8 v) { return (ulong8)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7)); } INLINE OVERLOADABLE int8 convert_int8(float8 v) { return (int8)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7)); } INLINE OVERLOADABLE uint8 convert_uint8(float8 v) { return (uint8)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7)); } INLINE OVERLOADABLE short8 convert_short8(float8 v) { return (short8)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7)); } INLINE OVERLOADABLE ushort8 convert_ushort8(float8 v) { return (ushort8)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7)); } INLINE OVERLOADABLE char8 convert_char8(float8 v) { return (char8)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7)); } INLINE OVERLOADABLE uchar8 convert_uchar8(float8 v) { return (uchar8)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7)); } INLINE OVERLOADABLE double8 convert_double8(float8 v) { return (double8)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3), (double)(v.s4), (double)(v.s5), (double)(v.s6), (double)(v.s7)); } INLINE OVERLOADABLE float8 convert_float8(float8 v) { return v; } INLINE OVERLOADABLE long16 convert_long16(long16 v) { return v; } INLINE OVERLOADABLE ulong16 convert_ulong16(long16 v) { return (ulong16)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7), (ulong)(v.s8), (ulong)(v.s9), (ulong)(v.sA), (ulong)(v.sB), (ulong)(v.sC), (ulong)(v.sD), (ulong)(v.sE), (ulong)(v.sF)); } INLINE OVERLOADABLE int16 convert_int16(long16 v) { return (int16)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7), (int)(v.s8), (int)(v.s9), (int)(v.sA), (int)(v.sB), (int)(v.sC), (int)(v.sD), (int)(v.sE), (int)(v.sF)); } INLINE OVERLOADABLE uint16 convert_uint16(long16 v) { return (uint16)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7), (uint)(v.s8), (uint)(v.s9), (uint)(v.sA), (uint)(v.sB), (uint)(v.sC), (uint)(v.sD), (uint)(v.sE), (uint)(v.sF)); } INLINE OVERLOADABLE short16 convert_short16(long16 v) { return (short16)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7), (short)(v.s8), (short)(v.s9), (short)(v.sA), (short)(v.sB), (short)(v.sC), (short)(v.sD), (short)(v.sE), (short)(v.sF)); } INLINE OVERLOADABLE ushort16 convert_ushort16(long16 v) { return (ushort16)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7), (ushort)(v.s8), (ushort)(v.s9), (ushort)(v.sA), (ushort)(v.sB), (ushort)(v.sC), (ushort)(v.sD), (ushort)(v.sE), (ushort)(v.sF)); } INLINE OVERLOADABLE char16 convert_char16(long16 v) { return (char16)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7), (char)(v.s8), (char)(v.s9), (char)(v.sA), (char)(v.sB), (char)(v.sC), (char)(v.sD), (char)(v.sE), (char)(v.sF)); } INLINE OVERLOADABLE uchar16 convert_uchar16(long16 v) { return (uchar16)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7), (uchar)(v.s8), (uchar)(v.s9), (uchar)(v.sA), (uchar)(v.sB), (uchar)(v.sC), (uchar)(v.sD), (uchar)(v.sE), (uchar)(v.sF)); } INLINE OVERLOADABLE double16 convert_double16(long16 v) { return (double16)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3), (double)(v.s4), (double)(v.s5), (double)(v.s6), (double)(v.s7), (double)(v.s8), (double)(v.s9), (double)(v.sA), (double)(v.sB), (double)(v.sC), (double)(v.sD), (double)(v.sE), (double)(v.sF)); } INLINE OVERLOADABLE float16 convert_float16(long16 v) { return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF)); } INLINE OVERLOADABLE long16 convert_long16(ulong16 v) { return (long16)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7), (long)(v.s8), (long)(v.s9), (long)(v.sA), (long)(v.sB), (long)(v.sC), (long)(v.sD), (long)(v.sE), (long)(v.sF)); } INLINE OVERLOADABLE ulong16 convert_ulong16(ulong16 v) { return v; } INLINE OVERLOADABLE int16 convert_int16(ulong16 v) { return (int16)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7), (int)(v.s8), (int)(v.s9), (int)(v.sA), (int)(v.sB), (int)(v.sC), (int)(v.sD), (int)(v.sE), (int)(v.sF)); } INLINE OVERLOADABLE uint16 convert_uint16(ulong16 v) { return (uint16)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7), (uint)(v.s8), (uint)(v.s9), (uint)(v.sA), (uint)(v.sB), (uint)(v.sC), (uint)(v.sD), (uint)(v.sE), (uint)(v.sF)); } INLINE OVERLOADABLE short16 convert_short16(ulong16 v) { return (short16)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7), (short)(v.s8), (short)(v.s9), (short)(v.sA), (short)(v.sB), (short)(v.sC), (short)(v.sD), (short)(v.sE), (short)(v.sF)); } INLINE OVERLOADABLE ushort16 convert_ushort16(ulong16 v) { return (ushort16)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7), (ushort)(v.s8), (ushort)(v.s9), (ushort)(v.sA), (ushort)(v.sB), (ushort)(v.sC), (ushort)(v.sD), (ushort)(v.sE), (ushort)(v.sF)); } INLINE OVERLOADABLE char16 convert_char16(ulong16 v) { return (char16)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7), (char)(v.s8), (char)(v.s9), (char)(v.sA), (char)(v.sB), (char)(v.sC), (char)(v.sD), (char)(v.sE), (char)(v.sF)); } INLINE OVERLOADABLE uchar16 convert_uchar16(ulong16 v) { return (uchar16)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7), (uchar)(v.s8), (uchar)(v.s9), (uchar)(v.sA), (uchar)(v.sB), (uchar)(v.sC), (uchar)(v.sD), (uchar)(v.sE), (uchar)(v.sF)); } INLINE OVERLOADABLE double16 convert_double16(ulong16 v) { return (double16)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3), (double)(v.s4), (double)(v.s5), (double)(v.s6), (double)(v.s7), (double)(v.s8), (double)(v.s9), (double)(v.sA), (double)(v.sB), (double)(v.sC), (double)(v.sD), (double)(v.sE), (double)(v.sF)); } INLINE OVERLOADABLE float16 convert_float16(ulong16 v) { return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF)); } INLINE OVERLOADABLE long16 convert_long16(int16 v) { return (long16)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7), (long)(v.s8), (long)(v.s9), (long)(v.sA), (long)(v.sB), (long)(v.sC), (long)(v.sD), (long)(v.sE), (long)(v.sF)); } INLINE OVERLOADABLE ulong16 convert_ulong16(int16 v) { return (ulong16)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7), (ulong)(v.s8), (ulong)(v.s9), (ulong)(v.sA), (ulong)(v.sB), (ulong)(v.sC), (ulong)(v.sD), (ulong)(v.sE), (ulong)(v.sF)); } INLINE OVERLOADABLE int16 convert_int16(int16 v) { return v; } INLINE OVERLOADABLE uint16 convert_uint16(int16 v) { return (uint16)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7), (uint)(v.s8), (uint)(v.s9), (uint)(v.sA), (uint)(v.sB), (uint)(v.sC), (uint)(v.sD), (uint)(v.sE), (uint)(v.sF)); } INLINE OVERLOADABLE short16 convert_short16(int16 v) { return (short16)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7), (short)(v.s8), (short)(v.s9), (short)(v.sA), (short)(v.sB), (short)(v.sC), (short)(v.sD), (short)(v.sE), (short)(v.sF)); } INLINE OVERLOADABLE ushort16 convert_ushort16(int16 v) { return (ushort16)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7), (ushort)(v.s8), (ushort)(v.s9), (ushort)(v.sA), (ushort)(v.sB), (ushort)(v.sC), (ushort)(v.sD), (ushort)(v.sE), (ushort)(v.sF)); } INLINE OVERLOADABLE char16 convert_char16(int16 v) { return (char16)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7), (char)(v.s8), (char)(v.s9), (char)(v.sA), (char)(v.sB), (char)(v.sC), (char)(v.sD), (char)(v.sE), (char)(v.sF)); } INLINE OVERLOADABLE uchar16 convert_uchar16(int16 v) { return (uchar16)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7), (uchar)(v.s8), (uchar)(v.s9), (uchar)(v.sA), (uchar)(v.sB), (uchar)(v.sC), (uchar)(v.sD), (uchar)(v.sE), (uchar)(v.sF)); } INLINE OVERLOADABLE double16 convert_double16(int16 v) { return (double16)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3), (double)(v.s4), (double)(v.s5), (double)(v.s6), (double)(v.s7), (double)(v.s8), (double)(v.s9), (double)(v.sA), (double)(v.sB), (double)(v.sC), (double)(v.sD), (double)(v.sE), (double)(v.sF)); } INLINE OVERLOADABLE float16 convert_float16(int16 v) { return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF)); } INLINE OVERLOADABLE long16 convert_long16(uint16 v) { return (long16)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7), (long)(v.s8), (long)(v.s9), (long)(v.sA), (long)(v.sB), (long)(v.sC), (long)(v.sD), (long)(v.sE), (long)(v.sF)); } INLINE OVERLOADABLE ulong16 convert_ulong16(uint16 v) { return (ulong16)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7), (ulong)(v.s8), (ulong)(v.s9), (ulong)(v.sA), (ulong)(v.sB), (ulong)(v.sC), (ulong)(v.sD), (ulong)(v.sE), (ulong)(v.sF)); } INLINE OVERLOADABLE int16 convert_int16(uint16 v) { return (int16)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7), (int)(v.s8), (int)(v.s9), (int)(v.sA), (int)(v.sB), (int)(v.sC), (int)(v.sD), (int)(v.sE), (int)(v.sF)); } INLINE OVERLOADABLE uint16 convert_uint16(uint16 v) { return v; } INLINE OVERLOADABLE short16 convert_short16(uint16 v) { return (short16)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7), (short)(v.s8), (short)(v.s9), (short)(v.sA), (short)(v.sB), (short)(v.sC), (short)(v.sD), (short)(v.sE), (short)(v.sF)); } INLINE OVERLOADABLE ushort16 convert_ushort16(uint16 v) { return (ushort16)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7), (ushort)(v.s8), (ushort)(v.s9), (ushort)(v.sA), (ushort)(v.sB), (ushort)(v.sC), (ushort)(v.sD), (ushort)(v.sE), (ushort)(v.sF)); } INLINE OVERLOADABLE char16 convert_char16(uint16 v) { return (char16)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7), (char)(v.s8), (char)(v.s9), (char)(v.sA), (char)(v.sB), (char)(v.sC), (char)(v.sD), (char)(v.sE), (char)(v.sF)); } INLINE OVERLOADABLE uchar16 convert_uchar16(uint16 v) { return (uchar16)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7), (uchar)(v.s8), (uchar)(v.s9), (uchar)(v.sA), (uchar)(v.sB), (uchar)(v.sC), (uchar)(v.sD), (uchar)(v.sE), (uchar)(v.sF)); } INLINE OVERLOADABLE double16 convert_double16(uint16 v) { return (double16)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3), (double)(v.s4), (double)(v.s5), (double)(v.s6), (double)(v.s7), (double)(v.s8), (double)(v.s9), (double)(v.sA), (double)(v.sB), (double)(v.sC), (double)(v.sD), (double)(v.sE), (double)(v.sF)); } INLINE OVERLOADABLE float16 convert_float16(uint16 v) { return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF)); } INLINE OVERLOADABLE long16 convert_long16(short16 v) { return (long16)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7), (long)(v.s8), (long)(v.s9), (long)(v.sA), (long)(v.sB), (long)(v.sC), (long)(v.sD), (long)(v.sE), (long)(v.sF)); } INLINE OVERLOADABLE ulong16 convert_ulong16(short16 v) { return (ulong16)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7), (ulong)(v.s8), (ulong)(v.s9), (ulong)(v.sA), (ulong)(v.sB), (ulong)(v.sC), (ulong)(v.sD), (ulong)(v.sE), (ulong)(v.sF)); } INLINE OVERLOADABLE int16 convert_int16(short16 v) { return (int16)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7), (int)(v.s8), (int)(v.s9), (int)(v.sA), (int)(v.sB), (int)(v.sC), (int)(v.sD), (int)(v.sE), (int)(v.sF)); } INLINE OVERLOADABLE uint16 convert_uint16(short16 v) { return (uint16)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7), (uint)(v.s8), (uint)(v.s9), (uint)(v.sA), (uint)(v.sB), (uint)(v.sC), (uint)(v.sD), (uint)(v.sE), (uint)(v.sF)); } INLINE OVERLOADABLE short16 convert_short16(short16 v) { return v; } INLINE OVERLOADABLE ushort16 convert_ushort16(short16 v) { return (ushort16)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7), (ushort)(v.s8), (ushort)(v.s9), (ushort)(v.sA), (ushort)(v.sB), (ushort)(v.sC), (ushort)(v.sD), (ushort)(v.sE), (ushort)(v.sF)); } INLINE OVERLOADABLE char16 convert_char16(short16 v) { return (char16)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7), (char)(v.s8), (char)(v.s9), (char)(v.sA), (char)(v.sB), (char)(v.sC), (char)(v.sD), (char)(v.sE), (char)(v.sF)); } INLINE OVERLOADABLE uchar16 convert_uchar16(short16 v) { return (uchar16)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7), (uchar)(v.s8), (uchar)(v.s9), (uchar)(v.sA), (uchar)(v.sB), (uchar)(v.sC), (uchar)(v.sD), (uchar)(v.sE), (uchar)(v.sF)); } INLINE OVERLOADABLE double16 convert_double16(short16 v) { return (double16)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3), (double)(v.s4), (double)(v.s5), (double)(v.s6), (double)(v.s7), (double)(v.s8), (double)(v.s9), (double)(v.sA), (double)(v.sB), (double)(v.sC), (double)(v.sD), (double)(v.sE), (double)(v.sF)); } INLINE OVERLOADABLE float16 convert_float16(short16 v) { return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF)); } INLINE OVERLOADABLE long16 convert_long16(ushort16 v) { return (long16)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7), (long)(v.s8), (long)(v.s9), (long)(v.sA), (long)(v.sB), (long)(v.sC), (long)(v.sD), (long)(v.sE), (long)(v.sF)); } INLINE OVERLOADABLE ulong16 convert_ulong16(ushort16 v) { return (ulong16)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7), (ulong)(v.s8), (ulong)(v.s9), (ulong)(v.sA), (ulong)(v.sB), (ulong)(v.sC), (ulong)(v.sD), (ulong)(v.sE), (ulong)(v.sF)); } INLINE OVERLOADABLE int16 convert_int16(ushort16 v) { return (int16)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7), (int)(v.s8), (int)(v.s9), (int)(v.sA), (int)(v.sB), (int)(v.sC), (int)(v.sD), (int)(v.sE), (int)(v.sF)); } INLINE OVERLOADABLE uint16 convert_uint16(ushort16 v) { return (uint16)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7), (uint)(v.s8), (uint)(v.s9), (uint)(v.sA), (uint)(v.sB), (uint)(v.sC), (uint)(v.sD), (uint)(v.sE), (uint)(v.sF)); } INLINE OVERLOADABLE short16 convert_short16(ushort16 v) { return (short16)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7), (short)(v.s8), (short)(v.s9), (short)(v.sA), (short)(v.sB), (short)(v.sC), (short)(v.sD), (short)(v.sE), (short)(v.sF)); } INLINE OVERLOADABLE ushort16 convert_ushort16(ushort16 v) { return v; } INLINE OVERLOADABLE char16 convert_char16(ushort16 v) { return (char16)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7), (char)(v.s8), (char)(v.s9), (char)(v.sA), (char)(v.sB), (char)(v.sC), (char)(v.sD), (char)(v.sE), (char)(v.sF)); } INLINE OVERLOADABLE uchar16 convert_uchar16(ushort16 v) { return (uchar16)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7), (uchar)(v.s8), (uchar)(v.s9), (uchar)(v.sA), (uchar)(v.sB), (uchar)(v.sC), (uchar)(v.sD), (uchar)(v.sE), (uchar)(v.sF)); } INLINE OVERLOADABLE double16 convert_double16(ushort16 v) { return (double16)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3), (double)(v.s4), (double)(v.s5), (double)(v.s6), (double)(v.s7), (double)(v.s8), (double)(v.s9), (double)(v.sA), (double)(v.sB), (double)(v.sC), (double)(v.sD), (double)(v.sE), (double)(v.sF)); } INLINE OVERLOADABLE float16 convert_float16(ushort16 v) { return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF)); } INLINE OVERLOADABLE long16 convert_long16(char16 v) { return (long16)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7), (long)(v.s8), (long)(v.s9), (long)(v.sA), (long)(v.sB), (long)(v.sC), (long)(v.sD), (long)(v.sE), (long)(v.sF)); } INLINE OVERLOADABLE ulong16 convert_ulong16(char16 v) { return (ulong16)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7), (ulong)(v.s8), (ulong)(v.s9), (ulong)(v.sA), (ulong)(v.sB), (ulong)(v.sC), (ulong)(v.sD), (ulong)(v.sE), (ulong)(v.sF)); } INLINE OVERLOADABLE int16 convert_int16(char16 v) { return (int16)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7), (int)(v.s8), (int)(v.s9), (int)(v.sA), (int)(v.sB), (int)(v.sC), (int)(v.sD), (int)(v.sE), (int)(v.sF)); } INLINE OVERLOADABLE uint16 convert_uint16(char16 v) { return (uint16)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7), (uint)(v.s8), (uint)(v.s9), (uint)(v.sA), (uint)(v.sB), (uint)(v.sC), (uint)(v.sD), (uint)(v.sE), (uint)(v.sF)); } INLINE OVERLOADABLE short16 convert_short16(char16 v) { return (short16)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7), (short)(v.s8), (short)(v.s9), (short)(v.sA), (short)(v.sB), (short)(v.sC), (short)(v.sD), (short)(v.sE), (short)(v.sF)); } INLINE OVERLOADABLE ushort16 convert_ushort16(char16 v) { return (ushort16)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7), (ushort)(v.s8), (ushort)(v.s9), (ushort)(v.sA), (ushort)(v.sB), (ushort)(v.sC), (ushort)(v.sD), (ushort)(v.sE), (ushort)(v.sF)); } INLINE OVERLOADABLE char16 convert_char16(char16 v) { return v; } INLINE OVERLOADABLE uchar16 convert_uchar16(char16 v) { return (uchar16)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7), (uchar)(v.s8), (uchar)(v.s9), (uchar)(v.sA), (uchar)(v.sB), (uchar)(v.sC), (uchar)(v.sD), (uchar)(v.sE), (uchar)(v.sF)); } INLINE OVERLOADABLE double16 convert_double16(char16 v) { return (double16)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3), (double)(v.s4), (double)(v.s5), (double)(v.s6), (double)(v.s7), (double)(v.s8), (double)(v.s9), (double)(v.sA), (double)(v.sB), (double)(v.sC), (double)(v.sD), (double)(v.sE), (double)(v.sF)); } INLINE OVERLOADABLE float16 convert_float16(char16 v) { return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF)); } INLINE OVERLOADABLE long16 convert_long16(uchar16 v) { return (long16)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7), (long)(v.s8), (long)(v.s9), (long)(v.sA), (long)(v.sB), (long)(v.sC), (long)(v.sD), (long)(v.sE), (long)(v.sF)); } INLINE OVERLOADABLE ulong16 convert_ulong16(uchar16 v) { return (ulong16)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7), (ulong)(v.s8), (ulong)(v.s9), (ulong)(v.sA), (ulong)(v.sB), (ulong)(v.sC), (ulong)(v.sD), (ulong)(v.sE), (ulong)(v.sF)); } INLINE OVERLOADABLE int16 convert_int16(uchar16 v) { return (int16)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7), (int)(v.s8), (int)(v.s9), (int)(v.sA), (int)(v.sB), (int)(v.sC), (int)(v.sD), (int)(v.sE), (int)(v.sF)); } INLINE OVERLOADABLE uint16 convert_uint16(uchar16 v) { return (uint16)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7), (uint)(v.s8), (uint)(v.s9), (uint)(v.sA), (uint)(v.sB), (uint)(v.sC), (uint)(v.sD), (uint)(v.sE), (uint)(v.sF)); } INLINE OVERLOADABLE short16 convert_short16(uchar16 v) { return (short16)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7), (short)(v.s8), (short)(v.s9), (short)(v.sA), (short)(v.sB), (short)(v.sC), (short)(v.sD), (short)(v.sE), (short)(v.sF)); } INLINE OVERLOADABLE ushort16 convert_ushort16(uchar16 v) { return (ushort16)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7), (ushort)(v.s8), (ushort)(v.s9), (ushort)(v.sA), (ushort)(v.sB), (ushort)(v.sC), (ushort)(v.sD), (ushort)(v.sE), (ushort)(v.sF)); } INLINE OVERLOADABLE char16 convert_char16(uchar16 v) { return (char16)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7), (char)(v.s8), (char)(v.s9), (char)(v.sA), (char)(v.sB), (char)(v.sC), (char)(v.sD), (char)(v.sE), (char)(v.sF)); } INLINE OVERLOADABLE uchar16 convert_uchar16(uchar16 v) { return v; } INLINE OVERLOADABLE double16 convert_double16(uchar16 v) { return (double16)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3), (double)(v.s4), (double)(v.s5), (double)(v.s6), (double)(v.s7), (double)(v.s8), (double)(v.s9), (double)(v.sA), (double)(v.sB), (double)(v.sC), (double)(v.sD), (double)(v.sE), (double)(v.sF)); } INLINE OVERLOADABLE float16 convert_float16(uchar16 v) { return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF)); } INLINE OVERLOADABLE long16 convert_long16(double16 v) { return (long16)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7), (long)(v.s8), (long)(v.s9), (long)(v.sA), (long)(v.sB), (long)(v.sC), (long)(v.sD), (long)(v.sE), (long)(v.sF)); } INLINE OVERLOADABLE ulong16 convert_ulong16(double16 v) { return (ulong16)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7), (ulong)(v.s8), (ulong)(v.s9), (ulong)(v.sA), (ulong)(v.sB), (ulong)(v.sC), (ulong)(v.sD), (ulong)(v.sE), (ulong)(v.sF)); } INLINE OVERLOADABLE int16 convert_int16(double16 v) { return (int16)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7), (int)(v.s8), (int)(v.s9), (int)(v.sA), (int)(v.sB), (int)(v.sC), (int)(v.sD), (int)(v.sE), (int)(v.sF)); } INLINE OVERLOADABLE uint16 convert_uint16(double16 v) { return (uint16)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7), (uint)(v.s8), (uint)(v.s9), (uint)(v.sA), (uint)(v.sB), (uint)(v.sC), (uint)(v.sD), (uint)(v.sE), (uint)(v.sF)); } INLINE OVERLOADABLE short16 convert_short16(double16 v) { return (short16)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7), (short)(v.s8), (short)(v.s9), (short)(v.sA), (short)(v.sB), (short)(v.sC), (short)(v.sD), (short)(v.sE), (short)(v.sF)); } INLINE OVERLOADABLE ushort16 convert_ushort16(double16 v) { return (ushort16)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7), (ushort)(v.s8), (ushort)(v.s9), (ushort)(v.sA), (ushort)(v.sB), (ushort)(v.sC), (ushort)(v.sD), (ushort)(v.sE), (ushort)(v.sF)); } INLINE OVERLOADABLE char16 convert_char16(double16 v) { return (char16)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7), (char)(v.s8), (char)(v.s9), (char)(v.sA), (char)(v.sB), (char)(v.sC), (char)(v.sD), (char)(v.sE), (char)(v.sF)); } INLINE OVERLOADABLE uchar16 convert_uchar16(double16 v) { return (uchar16)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7), (uchar)(v.s8), (uchar)(v.s9), (uchar)(v.sA), (uchar)(v.sB), (uchar)(v.sC), (uchar)(v.sD), (uchar)(v.sE), (uchar)(v.sF)); } INLINE OVERLOADABLE double16 convert_double16(double16 v) { return v; } INLINE OVERLOADABLE float16 convert_float16(double16 v) { return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF)); } INLINE OVERLOADABLE long16 convert_long16(float16 v) { return (long16)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7), (long)(v.s8), (long)(v.s9), (long)(v.sA), (long)(v.sB), (long)(v.sC), (long)(v.sD), (long)(v.sE), (long)(v.sF)); } INLINE OVERLOADABLE ulong16 convert_ulong16(float16 v) { return (ulong16)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7), (ulong)(v.s8), (ulong)(v.s9), (ulong)(v.sA), (ulong)(v.sB), (ulong)(v.sC), (ulong)(v.sD), (ulong)(v.sE), (ulong)(v.sF)); } INLINE OVERLOADABLE int16 convert_int16(float16 v) { return (int16)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7), (int)(v.s8), (int)(v.s9), (int)(v.sA), (int)(v.sB), (int)(v.sC), (int)(v.sD), (int)(v.sE), (int)(v.sF)); } INLINE OVERLOADABLE uint16 convert_uint16(float16 v) { return (uint16)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7), (uint)(v.s8), (uint)(v.s9), (uint)(v.sA), (uint)(v.sB), (uint)(v.sC), (uint)(v.sD), (uint)(v.sE), (uint)(v.sF)); } INLINE OVERLOADABLE short16 convert_short16(float16 v) { return (short16)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7), (short)(v.s8), (short)(v.s9), (short)(v.sA), (short)(v.sB), (short)(v.sC), (short)(v.sD), (short)(v.sE), (short)(v.sF)); } INLINE OVERLOADABLE ushort16 convert_ushort16(float16 v) { return (ushort16)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7), (ushort)(v.s8), (ushort)(v.s9), (ushort)(v.sA), (ushort)(v.sB), (ushort)(v.sC), (ushort)(v.sD), (ushort)(v.sE), (ushort)(v.sF)); } INLINE OVERLOADABLE char16 convert_char16(float16 v) { return (char16)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7), (char)(v.s8), (char)(v.s9), (char)(v.sA), (char)(v.sB), (char)(v.sC), (char)(v.sD), (char)(v.sE), (char)(v.sF)); } INLINE OVERLOADABLE uchar16 convert_uchar16(float16 v) { return (uchar16)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7), (uchar)(v.s8), (uchar)(v.s9), (uchar)(v.sA), (uchar)(v.sB), (uchar)(v.sC), (uchar)(v.sD), (uchar)(v.sE), (uchar)(v.sF)); } INLINE OVERLOADABLE double16 convert_double16(float16 v) { return (double16)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3), (double)(v.s4), (double)(v.s5), (double)(v.s6), (double)(v.s7), (double)(v.s8), (double)(v.s9), (double)(v.sA), (double)(v.sB), (double)(v.sC), (double)(v.sD), (double)(v.sE), (double)(v.sF)); } INLINE OVERLOADABLE float16 convert_float16(float16 v) { return v; } #define DEF(DSTTYPE, SRCTYPE) \ OVERLOADABLE DSTTYPE convert_ ## DSTTYPE ## _sat(SRCTYPE x); DEF(char, uchar); DEF(char, short); DEF(char, ushort); DEF(char, int); DEF(char, uint); DEF(char, float); DEF(uchar, char); DEF(uchar, short); DEF(uchar, ushort); DEF(uchar, int); DEF(uchar, uint); DEF(uchar, float); DEF(short, ushort); DEF(short, int); DEF(short, uint); DEF(short, float); DEF(ushort, short); DEF(ushort, int); DEF(ushort, uint); DEF(ushort, float); DEF(int, uint); DEF(int, float); DEF(uint, int); DEF(uint, float); #undef DEF #define DEF(DSTTYPE, SRCTYPE, MIN, MAX) \ INLINE_OVERLOADABLE DSTTYPE convert_ ## DSTTYPE ## _sat(SRCTYPE x) { \ return x > MAX ? (DSTTYPE)MAX : x < MIN ? (DSTTYPE)MIN : x; \ } DEF(char, long, -128, 127); DEF(uchar, long, 0, 255); DEF(short, long, -32768, 32767); DEF(ushort, long, 0, 65535); DEF(int, long, -0x7fffffff-1, 0x7fffffff); DEF(uint, long, 0, 0xffffffffu); DEF(long, float, -9.223372036854776e+18f, 9.223372036854776e+18f); DEF(ulong, float, 0, 1.8446744073709552e+19f); #undef DEF #define DEF(DSTTYPE, SRCTYPE, MAX) \ INLINE_OVERLOADABLE DSTTYPE convert_ ## DSTTYPE ## _sat(SRCTYPE x) { \ return x > MAX ? (DSTTYPE)MAX : x; \ } DEF(char, ulong, 127); DEF(uchar, ulong, 255); DEF(short, ulong, 32767); DEF(ushort, ulong, 65535); DEF(int, ulong, 0x7fffffff); DEF(uint, ulong, 0xffffffffu); #undef DEF INLINE_OVERLOADABLE long convert_long_sat(ulong x) { ulong MAX = 0x7ffffffffffffffful; return x > MAX ? MAX : x; } INLINE_OVERLOADABLE ulong convert_ulong_sat(long x) { return x < 0 ? 0 : x; } #define DEF(DSTTYPE, SRCTYPE) \ INLINE_OVERLOADABLE DSTTYPE convert_ ## DSTTYPE ## _sat(SRCTYPE x) { \ return x; \ } DEF(char, char); DEF(uchar, uchar); DEF(short, char); DEF(short, uchar); DEF(short, short); DEF(ushort, char); DEF(ushort, uchar); DEF(ushort, ushort); DEF(int, char); DEF(int, uchar); DEF(int, short); DEF(int, ushort); DEF(int, int); DEF(uint, char); DEF(uint, uchar); DEF(uint, short); DEF(uint, ushort); DEF(uint, uint); DEF(long, char); DEF(long, uchar); DEF(long, short); DEF(long, ushort); DEF(long, int); DEF(long, uint); DEF(long, long); DEF(ulong, char); DEF(ulong, uchar); DEF(ulong, short); DEF(ulong, ushort); DEF(ulong, int); DEF(ulong, uint); DEF(ulong, ulong); #undef DEF INLINE OVERLOADABLE long2 convert_long2_sat(long2 v) { return (long2)(convert_long_sat(v.s0), convert_long_sat(v.s1)); } INLINE OVERLOADABLE ulong2 convert_ulong2_sat(long2 v) { return (ulong2)(convert_ulong_sat(v.s0), convert_ulong_sat(v.s1)); } INLINE OVERLOADABLE int2 convert_int2_sat(long2 v) { return (int2)(convert_int_sat(v.s0), convert_int_sat(v.s1)); } INLINE OVERLOADABLE uint2 convert_uint2_sat(long2 v) { return (uint2)(convert_uint_sat(v.s0), convert_uint_sat(v.s1)); } INLINE OVERLOADABLE short2 convert_short2_sat(long2 v) { return (short2)(convert_short_sat(v.s0), convert_short_sat(v.s1)); } INLINE OVERLOADABLE ushort2 convert_ushort2_sat(long2 v) { return (ushort2)(convert_ushort_sat(v.s0), convert_ushort_sat(v.s1)); } INLINE OVERLOADABLE char2 convert_char2_sat(long2 v) { return (char2)(convert_char_sat(v.s0), convert_char_sat(v.s1)); } INLINE OVERLOADABLE uchar2 convert_uchar2_sat(long2 v) { return (uchar2)(convert_uchar_sat(v.s0), convert_uchar_sat(v.s1)); } INLINE OVERLOADABLE long2 convert_long2_sat(ulong2 v) { return (long2)(convert_long_sat(v.s0), convert_long_sat(v.s1)); } INLINE OVERLOADABLE ulong2 convert_ulong2_sat(ulong2 v) { return (ulong2)(convert_ulong_sat(v.s0), convert_ulong_sat(v.s1)); } INLINE OVERLOADABLE int2 convert_int2_sat(ulong2 v) { return (int2)(convert_int_sat(v.s0), convert_int_sat(v.s1)); } INLINE OVERLOADABLE uint2 convert_uint2_sat(ulong2 v) { return (uint2)(convert_uint_sat(v.s0), convert_uint_sat(v.s1)); } INLINE OVERLOADABLE short2 convert_short2_sat(ulong2 v) { return (short2)(convert_short_sat(v.s0), convert_short_sat(v.s1)); } INLINE OVERLOADABLE ushort2 convert_ushort2_sat(ulong2 v) { return (ushort2)(convert_ushort_sat(v.s0), convert_ushort_sat(v.s1)); } INLINE OVERLOADABLE char2 convert_char2_sat(ulong2 v) { return (char2)(convert_char_sat(v.s0), convert_char_sat(v.s1)); } INLINE OVERLOADABLE uchar2 convert_uchar2_sat(ulong2 v) { return (uchar2)(convert_uchar_sat(v.s0), convert_uchar_sat(v.s1)); } INLINE OVERLOADABLE long2 convert_long2_sat(int2 v) { return (long2)(convert_long_sat(v.s0), convert_long_sat(v.s1)); } INLINE OVERLOADABLE ulong2 convert_ulong2_sat(int2 v) { return (ulong2)(convert_ulong_sat(v.s0), convert_ulong_sat(v.s1)); } INLINE OVERLOADABLE int2 convert_int2_sat(int2 v) { return (int2)(convert_int_sat(v.s0), convert_int_sat(v.s1)); } INLINE OVERLOADABLE uint2 convert_uint2_sat(int2 v) { return (uint2)(convert_uint_sat(v.s0), convert_uint_sat(v.s1)); } INLINE OVERLOADABLE short2 convert_short2_sat(int2 v) { return (short2)(convert_short_sat(v.s0), convert_short_sat(v.s1)); } INLINE OVERLOADABLE ushort2 convert_ushort2_sat(int2 v) { return (ushort2)(convert_ushort_sat(v.s0), convert_ushort_sat(v.s1)); } INLINE OVERLOADABLE char2 convert_char2_sat(int2 v) { return (char2)(convert_char_sat(v.s0), convert_char_sat(v.s1)); } INLINE OVERLOADABLE uchar2 convert_uchar2_sat(int2 v) { return (uchar2)(convert_uchar_sat(v.s0), convert_uchar_sat(v.s1)); } INLINE OVERLOADABLE long2 convert_long2_sat(uint2 v) { return (long2)(convert_long_sat(v.s0), convert_long_sat(v.s1)); } INLINE OVERLOADABLE ulong2 convert_ulong2_sat(uint2 v) { return (ulong2)(convert_ulong_sat(v.s0), convert_ulong_sat(v.s1)); } INLINE OVERLOADABLE int2 convert_int2_sat(uint2 v) { return (int2)(convert_int_sat(v.s0), convert_int_sat(v.s1)); } INLINE OVERLOADABLE uint2 convert_uint2_sat(uint2 v) { return (uint2)(convert_uint_sat(v.s0), convert_uint_sat(v.s1)); } INLINE OVERLOADABLE short2 convert_short2_sat(uint2 v) { return (short2)(convert_short_sat(v.s0), convert_short_sat(v.s1)); } INLINE OVERLOADABLE ushort2 convert_ushort2_sat(uint2 v) { return (ushort2)(convert_ushort_sat(v.s0), convert_ushort_sat(v.s1)); } INLINE OVERLOADABLE char2 convert_char2_sat(uint2 v) { return (char2)(convert_char_sat(v.s0), convert_char_sat(v.s1)); } INLINE OVERLOADABLE uchar2 convert_uchar2_sat(uint2 v) { return (uchar2)(convert_uchar_sat(v.s0), convert_uchar_sat(v.s1)); } INLINE OVERLOADABLE long2 convert_long2_sat(short2 v) { return (long2)(convert_long_sat(v.s0), convert_long_sat(v.s1)); } INLINE OVERLOADABLE ulong2 convert_ulong2_sat(short2 v) { return (ulong2)(convert_ulong_sat(v.s0), convert_ulong_sat(v.s1)); } INLINE OVERLOADABLE int2 convert_int2_sat(short2 v) { return (int2)(convert_int_sat(v.s0), convert_int_sat(v.s1)); } INLINE OVERLOADABLE uint2 convert_uint2_sat(short2 v) { return (uint2)(convert_uint_sat(v.s0), convert_uint_sat(v.s1)); } INLINE OVERLOADABLE short2 convert_short2_sat(short2 v) { return (short2)(convert_short_sat(v.s0), convert_short_sat(v.s1)); } INLINE OVERLOADABLE ushort2 convert_ushort2_sat(short2 v) { return (ushort2)(convert_ushort_sat(v.s0), convert_ushort_sat(v.s1)); } INLINE OVERLOADABLE char2 convert_char2_sat(short2 v) { return (char2)(convert_char_sat(v.s0), convert_char_sat(v.s1)); } INLINE OVERLOADABLE uchar2 convert_uchar2_sat(short2 v) { return (uchar2)(convert_uchar_sat(v.s0), convert_uchar_sat(v.s1)); } INLINE OVERLOADABLE long2 convert_long2_sat(ushort2 v) { return (long2)(convert_long_sat(v.s0), convert_long_sat(v.s1)); } INLINE OVERLOADABLE ulong2 convert_ulong2_sat(ushort2 v) { return (ulong2)(convert_ulong_sat(v.s0), convert_ulong_sat(v.s1)); } INLINE OVERLOADABLE int2 convert_int2_sat(ushort2 v) { return (int2)(convert_int_sat(v.s0), convert_int_sat(v.s1)); } INLINE OVERLOADABLE uint2 convert_uint2_sat(ushort2 v) { return (uint2)(convert_uint_sat(v.s0), convert_uint_sat(v.s1)); } INLINE OVERLOADABLE short2 convert_short2_sat(ushort2 v) { return (short2)(convert_short_sat(v.s0), convert_short_sat(v.s1)); } INLINE OVERLOADABLE ushort2 convert_ushort2_sat(ushort2 v) { return (ushort2)(convert_ushort_sat(v.s0), convert_ushort_sat(v.s1)); } INLINE OVERLOADABLE char2 convert_char2_sat(ushort2 v) { return (char2)(convert_char_sat(v.s0), convert_char_sat(v.s1)); } INLINE OVERLOADABLE uchar2 convert_uchar2_sat(ushort2 v) { return (uchar2)(convert_uchar_sat(v.s0), convert_uchar_sat(v.s1)); } INLINE OVERLOADABLE long2 convert_long2_sat(char2 v) { return (long2)(convert_long_sat(v.s0), convert_long_sat(v.s1)); } INLINE OVERLOADABLE ulong2 convert_ulong2_sat(char2 v) { return (ulong2)(convert_ulong_sat(v.s0), convert_ulong_sat(v.s1)); } INLINE OVERLOADABLE int2 convert_int2_sat(char2 v) { return (int2)(convert_int_sat(v.s0), convert_int_sat(v.s1)); } INLINE OVERLOADABLE uint2 convert_uint2_sat(char2 v) { return (uint2)(convert_uint_sat(v.s0), convert_uint_sat(v.s1)); } INLINE OVERLOADABLE short2 convert_short2_sat(char2 v) { return (short2)(convert_short_sat(v.s0), convert_short_sat(v.s1)); } INLINE OVERLOADABLE ushort2 convert_ushort2_sat(char2 v) { return (ushort2)(convert_ushort_sat(v.s0), convert_ushort_sat(v.s1)); } INLINE OVERLOADABLE char2 convert_char2_sat(char2 v) { return (char2)(convert_char_sat(v.s0), convert_char_sat(v.s1)); } INLINE OVERLOADABLE uchar2 convert_uchar2_sat(char2 v) { return (uchar2)(convert_uchar_sat(v.s0), convert_uchar_sat(v.s1)); } INLINE OVERLOADABLE long2 convert_long2_sat(uchar2 v) { return (long2)(convert_long_sat(v.s0), convert_long_sat(v.s1)); } INLINE OVERLOADABLE ulong2 convert_ulong2_sat(uchar2 v) { return (ulong2)(convert_ulong_sat(v.s0), convert_ulong_sat(v.s1)); } INLINE OVERLOADABLE int2 convert_int2_sat(uchar2 v) { return (int2)(convert_int_sat(v.s0), convert_int_sat(v.s1)); } INLINE OVERLOADABLE uint2 convert_uint2_sat(uchar2 v) { return (uint2)(convert_uint_sat(v.s0), convert_uint_sat(v.s1)); } INLINE OVERLOADABLE short2 convert_short2_sat(uchar2 v) { return (short2)(convert_short_sat(v.s0), convert_short_sat(v.s1)); } INLINE OVERLOADABLE ushort2 convert_ushort2_sat(uchar2 v) { return (ushort2)(convert_ushort_sat(v.s0), convert_ushort_sat(v.s1)); } INLINE OVERLOADABLE char2 convert_char2_sat(uchar2 v) { return (char2)(convert_char_sat(v.s0), convert_char_sat(v.s1)); } INLINE OVERLOADABLE uchar2 convert_uchar2_sat(uchar2 v) { return (uchar2)(convert_uchar_sat(v.s0), convert_uchar_sat(v.s1)); } INLINE OVERLOADABLE long2 convert_long2_sat(float2 v) { return (long2)(convert_long_sat(v.s0), convert_long_sat(v.s1)); } INLINE OVERLOADABLE ulong2 convert_ulong2_sat(float2 v) { return (ulong2)(convert_ulong_sat(v.s0), convert_ulong_sat(v.s1)); } INLINE OVERLOADABLE int2 convert_int2_sat(float2 v) { return (int2)(convert_int_sat(v.s0), convert_int_sat(v.s1)); } INLINE OVERLOADABLE uint2 convert_uint2_sat(float2 v) { return (uint2)(convert_uint_sat(v.s0), convert_uint_sat(v.s1)); } INLINE OVERLOADABLE short2 convert_short2_sat(float2 v) { return (short2)(convert_short_sat(v.s0), convert_short_sat(v.s1)); } INLINE OVERLOADABLE ushort2 convert_ushort2_sat(float2 v) { return (ushort2)(convert_ushort_sat(v.s0), convert_ushort_sat(v.s1)); } INLINE OVERLOADABLE char2 convert_char2_sat(float2 v) { return (char2)(convert_char_sat(v.s0), convert_char_sat(v.s1)); } INLINE OVERLOADABLE uchar2 convert_uchar2_sat(float2 v) { return (uchar2)(convert_uchar_sat(v.s0), convert_uchar_sat(v.s1)); } INLINE OVERLOADABLE long3 convert_long3_sat(long3 v) { return (long3)(convert_long_sat(v.s0), convert_long_sat(v.s1), convert_long_sat(v.s2)); } INLINE OVERLOADABLE ulong3 convert_ulong3_sat(long3 v) { return (ulong3)(convert_ulong_sat(v.s0), convert_ulong_sat(v.s1), convert_ulong_sat(v.s2)); } INLINE OVERLOADABLE int3 convert_int3_sat(long3 v) { return (int3)(convert_int_sat(v.s0), convert_int_sat(v.s1), convert_int_sat(v.s2)); } INLINE OVERLOADABLE uint3 convert_uint3_sat(long3 v) { return (uint3)(convert_uint_sat(v.s0), convert_uint_sat(v.s1), convert_uint_sat(v.s2)); } INLINE OVERLOADABLE short3 convert_short3_sat(long3 v) { return (short3)(convert_short_sat(v.s0), convert_short_sat(v.s1), convert_short_sat(v.s2)); } INLINE OVERLOADABLE ushort3 convert_ushort3_sat(long3 v) { return (ushort3)(convert_ushort_sat(v.s0), convert_ushort_sat(v.s1), convert_ushort_sat(v.s2)); } INLINE OVERLOADABLE char3 convert_char3_sat(long3 v) { return (char3)(convert_char_sat(v.s0), convert_char_sat(v.s1), convert_char_sat(v.s2)); } INLINE OVERLOADABLE uchar3 convert_uchar3_sat(long3 v) { return (uchar3)(convert_uchar_sat(v.s0), convert_uchar_sat(v.s1), convert_uchar_sat(v.s2)); } INLINE OVERLOADABLE long3 convert_long3_sat(ulong3 v) { return (long3)(convert_long_sat(v.s0), convert_long_sat(v.s1), convert_long_sat(v.s2)); } INLINE OVERLOADABLE ulong3 convert_ulong3_sat(ulong3 v) { return (ulong3)(convert_ulong_sat(v.s0), convert_ulong_sat(v.s1), convert_ulong_sat(v.s2)); } INLINE OVERLOADABLE int3 convert_int3_sat(ulong3 v) { return (int3)(convert_int_sat(v.s0), convert_int_sat(v.s1), convert_int_sat(v.s2)); } INLINE OVERLOADABLE uint3 convert_uint3_sat(ulong3 v) { return (uint3)(convert_uint_sat(v.s0), convert_uint_sat(v.s1), convert_uint_sat(v.s2)); } INLINE OVERLOADABLE short3 convert_short3_sat(ulong3 v) { return (short3)(convert_short_sat(v.s0), convert_short_sat(v.s1), convert_short_sat(v.s2)); } INLINE OVERLOADABLE ushort3 convert_ushort3_sat(ulong3 v) { return (ushort3)(convert_ushort_sat(v.s0), convert_ushort_sat(v.s1), convert_ushort_sat(v.s2)); } INLINE OVERLOADABLE char3 convert_char3_sat(ulong3 v) { return (char3)(convert_char_sat(v.s0), convert_char_sat(v.s1), convert_char_sat(v.s2)); } INLINE OVERLOADABLE uchar3 convert_uchar3_sat(ulong3 v) { return (uchar3)(convert_uchar_sat(v.s0), convert_uchar_sat(v.s1), convert_uchar_sat(v.s2)); } INLINE OVERLOADABLE long3 convert_long3_sat(int3 v) { return (long3)(convert_long_sat(v.s0), convert_long_sat(v.s1), convert_long_sat(v.s2)); } INLINE OVERLOADABLE ulong3 convert_ulong3_sat(int3 v) { return (ulong3)(convert_ulong_sat(v.s0), convert_ulong_sat(v.s1), convert_ulong_sat(v.s2)); } INLINE OVERLOADABLE int3 convert_int3_sat(int3 v) { return (int3)(convert_int_sat(v.s0), convert_int_sat(v.s1), convert_int_sat(v.s2)); } INLINE OVERLOADABLE uint3 convert_uint3_sat(int3 v) { return (uint3)(convert_uint_sat(v.s0), convert_uint_sat(v.s1), convert_uint_sat(v.s2)); } INLINE OVERLOADABLE short3 convert_short3_sat(int3 v) { return (short3)(convert_short_sat(v.s0), convert_short_sat(v.s1), convert_short_sat(v.s2)); } INLINE OVERLOADABLE ushort3 convert_ushort3_sat(int3 v) { return (ushort3)(convert_ushort_sat(v.s0), convert_ushort_sat(v.s1), convert_ushort_sat(v.s2)); } INLINE OVERLOADABLE char3 convert_char3_sat(int3 v) { return (char3)(convert_char_sat(v.s0), convert_char_sat(v.s1), convert_char_sat(v.s2)); } INLINE OVERLOADABLE uchar3 convert_uchar3_sat(int3 v) { return (uchar3)(convert_uchar_sat(v.s0), convert_uchar_sat(v.s1), convert_uchar_sat(v.s2)); } INLINE OVERLOADABLE long3 convert_long3_sat(uint3 v) { return (long3)(convert_long_sat(v.s0), convert_long_sat(v.s1), convert_long_sat(v.s2)); } INLINE OVERLOADABLE ulong3 convert_ulong3_sat(uint3 v) { return (ulong3)(convert_ulong_sat(v.s0), convert_ulong_sat(v.s1), convert_ulong_sat(v.s2)); } INLINE OVERLOADABLE int3 convert_int3_sat(uint3 v) { return (int3)(convert_int_sat(v.s0), convert_int_sat(v.s1), convert_int_sat(v.s2)); } INLINE OVERLOADABLE uint3 convert_uint3_sat(uint3 v) { return (uint3)(convert_uint_sat(v.s0), convert_uint_sat(v.s1), convert_uint_sat(v.s2)); } INLINE OVERLOADABLE short3 convert_short3_sat(uint3 v) { return (short3)(convert_short_sat(v.s0), convert_short_sat(v.s1), convert_short_sat(v.s2)); } INLINE OVERLOADABLE ushort3 convert_ushort3_sat(uint3 v) { return (ushort3)(convert_ushort_sat(v.s0), convert_ushort_sat(v.s1), convert_ushort_sat(v.s2)); } INLINE OVERLOADABLE char3 convert_char3_sat(uint3 v) { return (char3)(convert_char_sat(v.s0), convert_char_sat(v.s1), convert_char_sat(v.s2)); } INLINE OVERLOADABLE uchar3 convert_uchar3_sat(uint3 v) { return (uchar3)(convert_uchar_sat(v.s0), convert_uchar_sat(v.s1), convert_uchar_sat(v.s2)); } INLINE OVERLOADABLE long3 convert_long3_sat(short3 v) { return (long3)(convert_long_sat(v.s0), convert_long_sat(v.s1), convert_long_sat(v.s2)); } INLINE OVERLOADABLE ulong3 convert_ulong3_sat(short3 v) { return (ulong3)(convert_ulong_sat(v.s0), convert_ulong_sat(v.s1), convert_ulong_sat(v.s2)); } INLINE OVERLOADABLE int3 convert_int3_sat(short3 v) { return (int3)(convert_int_sat(v.s0), convert_int_sat(v.s1), convert_int_sat(v.s2)); } INLINE OVERLOADABLE uint3 convert_uint3_sat(short3 v) { return (uint3)(convert_uint_sat(v.s0), convert_uint_sat(v.s1), convert_uint_sat(v.s2)); } INLINE OVERLOADABLE short3 convert_short3_sat(short3 v) { return (short3)(convert_short_sat(v.s0), convert_short_sat(v.s1), convert_short_sat(v.s2)); } INLINE OVERLOADABLE ushort3 convert_ushort3_sat(short3 v) { return (ushort3)(convert_ushort_sat(v.s0), convert_ushort_sat(v.s1), convert_ushort_sat(v.s2)); } INLINE OVERLOADABLE char3 convert_char3_sat(short3 v) { return (char3)(convert_char_sat(v.s0), convert_char_sat(v.s1), convert_char_sat(v.s2)); } INLINE OVERLOADABLE uchar3 convert_uchar3_sat(short3 v) { return (uchar3)(convert_uchar_sat(v.s0), convert_uchar_sat(v.s1), convert_uchar_sat(v.s2)); } INLINE OVERLOADABLE long3 convert_long3_sat(ushort3 v) { return (long3)(convert_long_sat(v.s0), convert_long_sat(v.s1), convert_long_sat(v.s2)); } INLINE OVERLOADABLE ulong3 convert_ulong3_sat(ushort3 v) { return (ulong3)(convert_ulong_sat(v.s0), convert_ulong_sat(v.s1), convert_ulong_sat(v.s2)); } INLINE OVERLOADABLE int3 convert_int3_sat(ushort3 v) { return (int3)(convert_int_sat(v.s0), convert_int_sat(v.s1), convert_int_sat(v.s2)); } INLINE OVERLOADABLE uint3 convert_uint3_sat(ushort3 v) { return (uint3)(convert_uint_sat(v.s0), convert_uint_sat(v.s1), convert_uint_sat(v.s2)); } INLINE OVERLOADABLE short3 convert_short3_sat(ushort3 v) { return (short3)(convert_short_sat(v.s0), convert_short_sat(v.s1), convert_short_sat(v.s2)); } INLINE OVERLOADABLE ushort3 convert_ushort3_sat(ushort3 v) { return (ushort3)(convert_ushort_sat(v.s0), convert_ushort_sat(v.s1), convert_ushort_sat(v.s2)); } INLINE OVERLOADABLE char3 convert_char3_sat(ushort3 v) { return (char3)(convert_char_sat(v.s0), convert_char_sat(v.s1), convert_char_sat(v.s2)); } INLINE OVERLOADABLE uchar3 convert_uchar3_sat(ushort3 v) { return (uchar3)(convert_uchar_sat(v.s0), convert_uchar_sat(v.s1), convert_uchar_sat(v.s2)); } INLINE OVERLOADABLE long3 convert_long3_sat(char3 v) { return (long3)(convert_long_sat(v.s0), convert_long_sat(v.s1), convert_long_sat(v.s2)); } INLINE OVERLOADABLE ulong3 convert_ulong3_sat(char3 v) { return (ulong3)(convert_ulong_sat(v.s0), convert_ulong_sat(v.s1), convert_ulong_sat(v.s2)); } INLINE OVERLOADABLE int3 convert_int3_sat(char3 v) { return (int3)(convert_int_sat(v.s0), convert_int_sat(v.s1), convert_int_sat(v.s2)); } INLINE OVERLOADABLE uint3 convert_uint3_sat(char3 v) { return (uint3)(convert_uint_sat(v.s0), convert_uint_sat(v.s1), convert_uint_sat(v.s2)); } INLINE OVERLOADABLE short3 convert_short3_sat(char3 v) { return (short3)(convert_short_sat(v.s0), convert_short_sat(v.s1), convert_short_sat(v.s2)); } INLINE OVERLOADABLE ushort3 convert_ushort3_sat(char3 v) { return (ushort3)(convert_ushort_sat(v.s0), convert_ushort_sat(v.s1), convert_ushort_sat(v.s2)); } INLINE OVERLOADABLE char3 convert_char3_sat(char3 v) { return (char3)(convert_char_sat(v.s0), convert_char_sat(v.s1), convert_char_sat(v.s2)); } INLINE OVERLOADABLE uchar3 convert_uchar3_sat(char3 v) { return (uchar3)(convert_uchar_sat(v.s0), convert_uchar_sat(v.s1), convert_uchar_sat(v.s2)); } INLINE OVERLOADABLE long3 convert_long3_sat(uchar3 v) { return (long3)(convert_long_sat(v.s0), convert_long_sat(v.s1), convert_long_sat(v.s2)); } INLINE OVERLOADABLE ulong3 convert_ulong3_sat(uchar3 v) { return (ulong3)(convert_ulong_sat(v.s0), convert_ulong_sat(v.s1), convert_ulong_sat(v.s2)); } INLINE OVERLOADABLE int3 convert_int3_sat(uchar3 v) { return (int3)(convert_int_sat(v.s0), convert_int_sat(v.s1), convert_int_sat(v.s2)); } INLINE OVERLOADABLE uint3 convert_uint3_sat(uchar3 v) { return (uint3)(convert_uint_sat(v.s0), convert_uint_sat(v.s1), convert_uint_sat(v.s2)); } INLINE OVERLOADABLE short3 convert_short3_sat(uchar3 v) { return (short3)(convert_short_sat(v.s0), convert_short_sat(v.s1), convert_short_sat(v.s2)); } INLINE OVERLOADABLE ushort3 convert_ushort3_sat(uchar3 v) { return (ushort3)(convert_ushort_sat(v.s0), convert_ushort_sat(v.s1), convert_ushort_sat(v.s2)); } INLINE OVERLOADABLE char3 convert_char3_sat(uchar3 v) { return (char3)(convert_char_sat(v.s0), convert_char_sat(v.s1), convert_char_sat(v.s2)); } INLINE OVERLOADABLE uchar3 convert_uchar3_sat(uchar3 v) { return (uchar3)(convert_uchar_sat(v.s0), convert_uchar_sat(v.s1), convert_uchar_sat(v.s2)); } INLINE OVERLOADABLE long3 convert_long3_sat(float3 v) { return (long3)(convert_long_sat(v.s0), convert_long_sat(v.s1), convert_long_sat(v.s2)); } INLINE OVERLOADABLE ulong3 convert_ulong3_sat(float3 v) { return (ulong3)(convert_ulong_sat(v.s0), convert_ulong_sat(v.s1), convert_ulong_sat(v.s2)); } INLINE OVERLOADABLE int3 convert_int3_sat(float3 v) { return (int3)(convert_int_sat(v.s0), convert_int_sat(v.s1), convert_int_sat(v.s2)); } INLINE OVERLOADABLE uint3 convert_uint3_sat(float3 v) { return (uint3)(convert_uint_sat(v.s0), convert_uint_sat(v.s1), convert_uint_sat(v.s2)); } INLINE OVERLOADABLE short3 convert_short3_sat(float3 v) { return (short3)(convert_short_sat(v.s0), convert_short_sat(v.s1), convert_short_sat(v.s2)); } INLINE OVERLOADABLE ushort3 convert_ushort3_sat(float3 v) { return (ushort3)(convert_ushort_sat(v.s0), convert_ushort_sat(v.s1), convert_ushort_sat(v.s2)); } INLINE OVERLOADABLE char3 convert_char3_sat(float3 v) { return (char3)(convert_char_sat(v.s0), convert_char_sat(v.s1), convert_char_sat(v.s2)); } INLINE OVERLOADABLE uchar3 convert_uchar3_sat(float3 v) { return (uchar3)(convert_uchar_sat(v.s0), convert_uchar_sat(v.s1), convert_uchar_sat(v.s2)); } INLINE OVERLOADABLE long4 convert_long4_sat(long4 v) { return (long4)(convert_long_sat(v.s0), convert_long_sat(v.s1), convert_long_sat(v.s2), convert_long_sat(v.s3)); } INLINE OVERLOADABLE ulong4 convert_ulong4_sat(long4 v) { return (ulong4)(convert_ulong_sat(v.s0), convert_ulong_sat(v.s1), convert_ulong_sat(v.s2), convert_ulong_sat(v.s3)); } INLINE OVERLOADABLE int4 convert_int4_sat(long4 v) { return (int4)(convert_int_sat(v.s0), convert_int_sat(v.s1), convert_int_sat(v.s2), convert_int_sat(v.s3)); } INLINE OVERLOADABLE uint4 convert_uint4_sat(long4 v) { return (uint4)(convert_uint_sat(v.s0), convert_uint_sat(v.s1), convert_uint_sat(v.s2), convert_uint_sat(v.s3)); } INLINE OVERLOADABLE short4 convert_short4_sat(long4 v) { return (short4)(convert_short_sat(v.s0), convert_short_sat(v.s1), convert_short_sat(v.s2), convert_short_sat(v.s3)); } INLINE OVERLOADABLE ushort4 convert_ushort4_sat(long4 v) { return (ushort4)(convert_ushort_sat(v.s0), convert_ushort_sat(v.s1), convert_ushort_sat(v.s2), convert_ushort_sat(v.s3)); } INLINE OVERLOADABLE char4 convert_char4_sat(long4 v) { return (char4)(convert_char_sat(v.s0), convert_char_sat(v.s1), convert_char_sat(v.s2), convert_char_sat(v.s3)); } INLINE OVERLOADABLE uchar4 convert_uchar4_sat(long4 v) { return (uchar4)(convert_uchar_sat(v.s0), convert_uchar_sat(v.s1), convert_uchar_sat(v.s2), convert_uchar_sat(v.s3)); } INLINE OVERLOADABLE long4 convert_long4_sat(ulong4 v) { return (long4)(convert_long_sat(v.s0), convert_long_sat(v.s1), convert_long_sat(v.s2), convert_long_sat(v.s3)); } INLINE OVERLOADABLE ulong4 convert_ulong4_sat(ulong4 v) { return (ulong4)(convert_ulong_sat(v.s0), convert_ulong_sat(v.s1), convert_ulong_sat(v.s2), convert_ulong_sat(v.s3)); } INLINE OVERLOADABLE int4 convert_int4_sat(ulong4 v) { return (int4)(convert_int_sat(v.s0), convert_int_sat(v.s1), convert_int_sat(v.s2), convert_int_sat(v.s3)); } INLINE OVERLOADABLE uint4 convert_uint4_sat(ulong4 v) { return (uint4)(convert_uint_sat(v.s0), convert_uint_sat(v.s1), convert_uint_sat(v.s2), convert_uint_sat(v.s3)); } INLINE OVERLOADABLE short4 convert_short4_sat(ulong4 v) { return (short4)(convert_short_sat(v.s0), convert_short_sat(v.s1), convert_short_sat(v.s2), convert_short_sat(v.s3)); } INLINE OVERLOADABLE ushort4 convert_ushort4_sat(ulong4 v) { return (ushort4)(convert_ushort_sat(v.s0), convert_ushort_sat(v.s1), convert_ushort_sat(v.s2), convert_ushort_sat(v.s3)); } INLINE OVERLOADABLE char4 convert_char4_sat(ulong4 v) { return (char4)(convert_char_sat(v.s0), convert_char_sat(v.s1), convert_char_sat(v.s2), convert_char_sat(v.s3)); } INLINE OVERLOADABLE uchar4 convert_uchar4_sat(ulong4 v) { return (uchar4)(convert_uchar_sat(v.s0), convert_uchar_sat(v.s1), convert_uchar_sat(v.s2), convert_uchar_sat(v.s3)); } INLINE OVERLOADABLE long4 convert_long4_sat(int4 v) { return (long4)(convert_long_sat(v.s0), convert_long_sat(v.s1), convert_long_sat(v.s2), convert_long_sat(v.s3)); } INLINE OVERLOADABLE ulong4 convert_ulong4_sat(int4 v) { return (ulong4)(convert_ulong_sat(v.s0), convert_ulong_sat(v.s1), convert_ulong_sat(v.s2), convert_ulong_sat(v.s3)); } INLINE OVERLOADABLE int4 convert_int4_sat(int4 v) { return (int4)(convert_int_sat(v.s0), convert_int_sat(v.s1), convert_int_sat(v.s2), convert_int_sat(v.s3)); } INLINE OVERLOADABLE uint4 convert_uint4_sat(int4 v) { return (uint4)(convert_uint_sat(v.s0), convert_uint_sat(v.s1), convert_uint_sat(v.s2), convert_uint_sat(v.s3)); } INLINE OVERLOADABLE short4 convert_short4_sat(int4 v) { return (short4)(convert_short_sat(v.s0), convert_short_sat(v.s1), convert_short_sat(v.s2), convert_short_sat(v.s3)); } INLINE OVERLOADABLE ushort4 convert_ushort4_sat(int4 v) { return (ushort4)(convert_ushort_sat(v.s0), convert_ushort_sat(v.s1), convert_ushort_sat(v.s2), convert_ushort_sat(v.s3)); } INLINE OVERLOADABLE char4 convert_char4_sat(int4 v) { return (char4)(convert_char_sat(v.s0), convert_char_sat(v.s1), convert_char_sat(v.s2), convert_char_sat(v.s3)); } INLINE OVERLOADABLE uchar4 convert_uchar4_sat(int4 v) { return (uchar4)(convert_uchar_sat(v.s0), convert_uchar_sat(v.s1), convert_uchar_sat(v.s2), convert_uchar_sat(v.s3)); } INLINE OVERLOADABLE long4 convert_long4_sat(uint4 v) { return (long4)(convert_long_sat(v.s0), convert_long_sat(v.s1), convert_long_sat(v.s2), convert_long_sat(v.s3)); } INLINE OVERLOADABLE ulong4 convert_ulong4_sat(uint4 v) { return (ulong4)(convert_ulong_sat(v.s0), convert_ulong_sat(v.s1), convert_ulong_sat(v.s2), convert_ulong_sat(v.s3)); } INLINE OVERLOADABLE int4 convert_int4_sat(uint4 v) { return (int4)(convert_int_sat(v.s0), convert_int_sat(v.s1), convert_int_sat(v.s2), convert_int_sat(v.s3)); } INLINE OVERLOADABLE uint4 convert_uint4_sat(uint4 v) { return (uint4)(convert_uint_sat(v.s0), convert_uint_sat(v.s1), convert_uint_sat(v.s2), convert_uint_sat(v.s3)); } INLINE OVERLOADABLE short4 convert_short4_sat(uint4 v) { return (short4)(convert_short_sat(v.s0), convert_short_sat(v.s1), convert_short_sat(v.s2), convert_short_sat(v.s3)); } INLINE OVERLOADABLE ushort4 convert_ushort4_sat(uint4 v) { return (ushort4)(convert_ushort_sat(v.s0), convert_ushort_sat(v.s1), convert_ushort_sat(v.s2), convert_ushort_sat(v.s3)); } INLINE OVERLOADABLE char4 convert_char4_sat(uint4 v) { return (char4)(convert_char_sat(v.s0), convert_char_sat(v.s1), convert_char_sat(v.s2), convert_char_sat(v.s3)); } INLINE OVERLOADABLE uchar4 convert_uchar4_sat(uint4 v) { return (uchar4)(convert_uchar_sat(v.s0), convert_uchar_sat(v.s1), convert_uchar_sat(v.s2), convert_uchar_sat(v.s3)); } INLINE OVERLOADABLE long4 convert_long4_sat(short4 v) { return (long4)(convert_long_sat(v.s0), convert_long_sat(v.s1), convert_long_sat(v.s2), convert_long_sat(v.s3)); } INLINE OVERLOADABLE ulong4 convert_ulong4_sat(short4 v) { return (ulong4)(convert_ulong_sat(v.s0), convert_ulong_sat(v.s1), convert_ulong_sat(v.s2), convert_ulong_sat(v.s3)); } INLINE OVERLOADABLE int4 convert_int4_sat(short4 v) { return (int4)(convert_int_sat(v.s0), convert_int_sat(v.s1), convert_int_sat(v.s2), convert_int_sat(v.s3)); } INLINE OVERLOADABLE uint4 convert_uint4_sat(short4 v) { return (uint4)(convert_uint_sat(v.s0), convert_uint_sat(v.s1), convert_uint_sat(v.s2), convert_uint_sat(v.s3)); } INLINE OVERLOADABLE short4 convert_short4_sat(short4 v) { return (short4)(convert_short_sat(v.s0), convert_short_sat(v.s1), convert_short_sat(v.s2), convert_short_sat(v.s3)); } INLINE OVERLOADABLE ushort4 convert_ushort4_sat(short4 v) { return (ushort4)(convert_ushort_sat(v.s0), convert_ushort_sat(v.s1), convert_ushort_sat(v.s2), convert_ushort_sat(v.s3)); } INLINE OVERLOADABLE char4 convert_char4_sat(short4 v) { return (char4)(convert_char_sat(v.s0), convert_char_sat(v.s1), convert_char_sat(v.s2), convert_char_sat(v.s3)); } INLINE OVERLOADABLE uchar4 convert_uchar4_sat(short4 v) { return (uchar4)(convert_uchar_sat(v.s0), convert_uchar_sat(v.s1), convert_uchar_sat(v.s2), convert_uchar_sat(v.s3)); } INLINE OVERLOADABLE long4 convert_long4_sat(ushort4 v) { return (long4)(convert_long_sat(v.s0), convert_long_sat(v.s1), convert_long_sat(v.s2), convert_long_sat(v.s3)); } INLINE OVERLOADABLE ulong4 convert_ulong4_sat(ushort4 v) { return (ulong4)(convert_ulong_sat(v.s0), convert_ulong_sat(v.s1), convert_ulong_sat(v.s2), convert_ulong_sat(v.s3)); } INLINE OVERLOADABLE int4 convert_int4_sat(ushort4 v) { return (int4)(convert_int_sat(v.s0), convert_int_sat(v.s1), convert_int_sat(v.s2), convert_int_sat(v.s3)); } INLINE OVERLOADABLE uint4 convert_uint4_sat(ushort4 v) { return (uint4)(convert_uint_sat(v.s0), convert_uint_sat(v.s1), convert_uint_sat(v.s2), convert_uint_sat(v.s3)); } INLINE OVERLOADABLE short4 convert_short4_sat(ushort4 v) { return (short4)(convert_short_sat(v.s0), convert_short_sat(v.s1), convert_short_sat(v.s2), convert_short_sat(v.s3)); } INLINE OVERLOADABLE ushort4 convert_ushort4_sat(ushort4 v) { return (ushort4)(convert_ushort_sat(v.s0), convert_ushort_sat(v.s1), convert_ushort_sat(v.s2), convert_ushort_sat(v.s3)); } INLINE OVERLOADABLE char4 convert_char4_sat(ushort4 v) { return (char4)(convert_char_sat(v.s0), convert_char_sat(v.s1), convert_char_sat(v.s2), convert_char_sat(v.s3)); } INLINE OVERLOADABLE uchar4 convert_uchar4_sat(ushort4 v) { return (uchar4)(convert_uchar_sat(v.s0), convert_uchar_sat(v.s1), convert_uchar_sat(v.s2), convert_uchar_sat(v.s3)); } INLINE OVERLOADABLE long4 convert_long4_sat(char4 v) { return (long4)(convert_long_sat(v.s0), convert_long_sat(v.s1), convert_long_sat(v.s2), convert_long_sat(v.s3)); } INLINE OVERLOADABLE ulong4 convert_ulong4_sat(char4 v) { return (ulong4)(convert_ulong_sat(v.s0), convert_ulong_sat(v.s1), convert_ulong_sat(v.s2), convert_ulong_sat(v.s3)); } INLINE OVERLOADABLE int4 convert_int4_sat(char4 v) { return (int4)(convert_int_sat(v.s0), convert_int_sat(v.s1), convert_int_sat(v.s2), convert_int_sat(v.s3)); } INLINE OVERLOADABLE uint4 convert_uint4_sat(char4 v) { return (uint4)(convert_uint_sat(v.s0), convert_uint_sat(v.s1), convert_uint_sat(v.s2), convert_uint_sat(v.s3)); } INLINE OVERLOADABLE short4 convert_short4_sat(char4 v) { return (short4)(convert_short_sat(v.s0), convert_short_sat(v.s1), convert_short_sat(v.s2), convert_short_sat(v.s3)); } INLINE OVERLOADABLE ushort4 convert_ushort4_sat(char4 v) { return (ushort4)(convert_ushort_sat(v.s0), convert_ushort_sat(v.s1), convert_ushort_sat(v.s2), convert_ushort_sat(v.s3)); } INLINE OVERLOADABLE char4 convert_char4_sat(char4 v) { return (char4)(convert_char_sat(v.s0), convert_char_sat(v.s1), convert_char_sat(v.s2), convert_char_sat(v.s3)); } INLINE OVERLOADABLE uchar4 convert_uchar4_sat(char4 v) { return (uchar4)(convert_uchar_sat(v.s0), convert_uchar_sat(v.s1), convert_uchar_sat(v.s2), convert_uchar_sat(v.s3)); } INLINE OVERLOADABLE long4 convert_long4_sat(uchar4 v) { return (long4)(convert_long_sat(v.s0), convert_long_sat(v.s1), convert_long_sat(v.s2), convert_long_sat(v.s3)); } INLINE OVERLOADABLE ulong4 convert_ulong4_sat(uchar4 v) { return (ulong4)(convert_ulong_sat(v.s0), convert_ulong_sat(v.s1), convert_ulong_sat(v.s2), convert_ulong_sat(v.s3)); } INLINE OVERLOADABLE int4 convert_int4_sat(uchar4 v) { return (int4)(convert_int_sat(v.s0), convert_int_sat(v.s1), convert_int_sat(v.s2), convert_int_sat(v.s3)); } INLINE OVERLOADABLE uint4 convert_uint4_sat(uchar4 v) { return (uint4)(convert_uint_sat(v.s0), convert_uint_sat(v.s1), convert_uint_sat(v.s2), convert_uint_sat(v.s3)); } INLINE OVERLOADABLE short4 convert_short4_sat(uchar4 v) { return (short4)(convert_short_sat(v.s0), convert_short_sat(v.s1), convert_short_sat(v.s2), convert_short_sat(v.s3)); } INLINE OVERLOADABLE ushort4 convert_ushort4_sat(uchar4 v) { return (ushort4)(convert_ushort_sat(v.s0), convert_ushort_sat(v.s1), convert_ushort_sat(v.s2), convert_ushort_sat(v.s3)); } INLINE OVERLOADABLE char4 convert_char4_sat(uchar4 v) { return (char4)(convert_char_sat(v.s0), convert_char_sat(v.s1), convert_char_sat(v.s2), convert_char_sat(v.s3)); } INLINE OVERLOADABLE uchar4 convert_uchar4_sat(uchar4 v) { return (uchar4)(convert_uchar_sat(v.s0), convert_uchar_sat(v.s1), convert_uchar_sat(v.s2), convert_uchar_sat(v.s3)); } INLINE OVERLOADABLE long4 convert_long4_sat(float4 v) { return (long4)(convert_long_sat(v.s0), convert_long_sat(v.s1), convert_long_sat(v.s2), convert_long_sat(v.s3)); } INLINE OVERLOADABLE ulong4 convert_ulong4_sat(float4 v) { return (ulong4)(convert_ulong_sat(v.s0), convert_ulong_sat(v.s1), convert_ulong_sat(v.s2), convert_ulong_sat(v.s3)); } INLINE OVERLOADABLE int4 convert_int4_sat(float4 v) { return (int4)(convert_int_sat(v.s0), convert_int_sat(v.s1), convert_int_sat(v.s2), convert_int_sat(v.s3)); } INLINE OVERLOADABLE uint4 convert_uint4_sat(float4 v) { return (uint4)(convert_uint_sat(v.s0), convert_uint_sat(v.s1), convert_uint_sat(v.s2), convert_uint_sat(v.s3)); } INLINE OVERLOADABLE short4 convert_short4_sat(float4 v) { return (short4)(convert_short_sat(v.s0), convert_short_sat(v.s1), convert_short_sat(v.s2), convert_short_sat(v.s3)); } INLINE OVERLOADABLE ushort4 convert_ushort4_sat(float4 v) { return (ushort4)(convert_ushort_sat(v.s0), convert_ushort_sat(v.s1), convert_ushort_sat(v.s2), convert_ushort_sat(v.s3)); } INLINE OVERLOADABLE char4 convert_char4_sat(float4 v) { return (char4)(convert_char_sat(v.s0), convert_char_sat(v.s1), convert_char_sat(v.s2), convert_char_sat(v.s3)); } INLINE OVERLOADABLE uchar4 convert_uchar4_sat(float4 v) { return (uchar4)(convert_uchar_sat(v.s0), convert_uchar_sat(v.s1), convert_uchar_sat(v.s2), convert_uchar_sat(v.s3)); } INLINE OVERLOADABLE long8 convert_long8_sat(long8 v) { return (long8)(convert_long_sat(v.s0), convert_long_sat(v.s1), convert_long_sat(v.s2), convert_long_sat(v.s3), convert_long_sat(v.s4), convert_long_sat(v.s5), convert_long_sat(v.s6), convert_long_sat(v.s7)); } INLINE OVERLOADABLE ulong8 convert_ulong8_sat(long8 v) { return (ulong8)(convert_ulong_sat(v.s0), convert_ulong_sat(v.s1), convert_ulong_sat(v.s2), convert_ulong_sat(v.s3), convert_ulong_sat(v.s4), convert_ulong_sat(v.s5), convert_ulong_sat(v.s6), convert_ulong_sat(v.s7)); } INLINE OVERLOADABLE int8 convert_int8_sat(long8 v) { return (int8)(convert_int_sat(v.s0), convert_int_sat(v.s1), convert_int_sat(v.s2), convert_int_sat(v.s3), convert_int_sat(v.s4), convert_int_sat(v.s5), convert_int_sat(v.s6), convert_int_sat(v.s7)); } INLINE OVERLOADABLE uint8 convert_uint8_sat(long8 v) { return (uint8)(convert_uint_sat(v.s0), convert_uint_sat(v.s1), convert_uint_sat(v.s2), convert_uint_sat(v.s3), convert_uint_sat(v.s4), convert_uint_sat(v.s5), convert_uint_sat(v.s6), convert_uint_sat(v.s7)); } INLINE OVERLOADABLE short8 convert_short8_sat(long8 v) { return (short8)(convert_short_sat(v.s0), convert_short_sat(v.s1), convert_short_sat(v.s2), convert_short_sat(v.s3), convert_short_sat(v.s4), convert_short_sat(v.s5), convert_short_sat(v.s6), convert_short_sat(v.s7)); } INLINE OVERLOADABLE ushort8 convert_ushort8_sat(long8 v) { return (ushort8)(convert_ushort_sat(v.s0), convert_ushort_sat(v.s1), convert_ushort_sat(v.s2), convert_ushort_sat(v.s3), convert_ushort_sat(v.s4), convert_ushort_sat(v.s5), convert_ushort_sat(v.s6), convert_ushort_sat(v.s7)); } INLINE OVERLOADABLE char8 convert_char8_sat(long8 v) { return (char8)(convert_char_sat(v.s0), convert_char_sat(v.s1), convert_char_sat(v.s2), convert_char_sat(v.s3), convert_char_sat(v.s4), convert_char_sat(v.s5), convert_char_sat(v.s6), convert_char_sat(v.s7)); } INLINE OVERLOADABLE uchar8 convert_uchar8_sat(long8 v) { return (uchar8)(convert_uchar_sat(v.s0), convert_uchar_sat(v.s1), convert_uchar_sat(v.s2), convert_uchar_sat(v.s3), convert_uchar_sat(v.s4), convert_uchar_sat(v.s5), convert_uchar_sat(v.s6), convert_uchar_sat(v.s7)); } INLINE OVERLOADABLE long8 convert_long8_sat(ulong8 v) { return (long8)(convert_long_sat(v.s0), convert_long_sat(v.s1), convert_long_sat(v.s2), convert_long_sat(v.s3), convert_long_sat(v.s4), convert_long_sat(v.s5), convert_long_sat(v.s6), convert_long_sat(v.s7)); } INLINE OVERLOADABLE ulong8 convert_ulong8_sat(ulong8 v) { return (ulong8)(convert_ulong_sat(v.s0), convert_ulong_sat(v.s1), convert_ulong_sat(v.s2), convert_ulong_sat(v.s3), convert_ulong_sat(v.s4), convert_ulong_sat(v.s5), convert_ulong_sat(v.s6), convert_ulong_sat(v.s7)); } INLINE OVERLOADABLE int8 convert_int8_sat(ulong8 v) { return (int8)(convert_int_sat(v.s0), convert_int_sat(v.s1), convert_int_sat(v.s2), convert_int_sat(v.s3), convert_int_sat(v.s4), convert_int_sat(v.s5), convert_int_sat(v.s6), convert_int_sat(v.s7)); } INLINE OVERLOADABLE uint8 convert_uint8_sat(ulong8 v) { return (uint8)(convert_uint_sat(v.s0), convert_uint_sat(v.s1), convert_uint_sat(v.s2), convert_uint_sat(v.s3), convert_uint_sat(v.s4), convert_uint_sat(v.s5), convert_uint_sat(v.s6), convert_uint_sat(v.s7)); } INLINE OVERLOADABLE short8 convert_short8_sat(ulong8 v) { return (short8)(convert_short_sat(v.s0), convert_short_sat(v.s1), convert_short_sat(v.s2), convert_short_sat(v.s3), convert_short_sat(v.s4), convert_short_sat(v.s5), convert_short_sat(v.s6), convert_short_sat(v.s7)); } INLINE OVERLOADABLE ushort8 convert_ushort8_sat(ulong8 v) { return (ushort8)(convert_ushort_sat(v.s0), convert_ushort_sat(v.s1), convert_ushort_sat(v.s2), convert_ushort_sat(v.s3), convert_ushort_sat(v.s4), convert_ushort_sat(v.s5), convert_ushort_sat(v.s6), convert_ushort_sat(v.s7)); } INLINE OVERLOADABLE char8 convert_char8_sat(ulong8 v) { return (char8)(convert_char_sat(v.s0), convert_char_sat(v.s1), convert_char_sat(v.s2), convert_char_sat(v.s3), convert_char_sat(v.s4), convert_char_sat(v.s5), convert_char_sat(v.s6), convert_char_sat(v.s7)); } INLINE OVERLOADABLE uchar8 convert_uchar8_sat(ulong8 v) { return (uchar8)(convert_uchar_sat(v.s0), convert_uchar_sat(v.s1), convert_uchar_sat(v.s2), convert_uchar_sat(v.s3), convert_uchar_sat(v.s4), convert_uchar_sat(v.s5), convert_uchar_sat(v.s6), convert_uchar_sat(v.s7)); } INLINE OVERLOADABLE long8 convert_long8_sat(int8 v) { return (long8)(convert_long_sat(v.s0), convert_long_sat(v.s1), convert_long_sat(v.s2), convert_long_sat(v.s3), convert_long_sat(v.s4), convert_long_sat(v.s5), convert_long_sat(v.s6), convert_long_sat(v.s7)); } INLINE OVERLOADABLE ulong8 convert_ulong8_sat(int8 v) { return (ulong8)(convert_ulong_sat(v.s0), convert_ulong_sat(v.s1), convert_ulong_sat(v.s2), convert_ulong_sat(v.s3), convert_ulong_sat(v.s4), convert_ulong_sat(v.s5), convert_ulong_sat(v.s6), convert_ulong_sat(v.s7)); } INLINE OVERLOADABLE int8 convert_int8_sat(int8 v) { return (int8)(convert_int_sat(v.s0), convert_int_sat(v.s1), convert_int_sat(v.s2), convert_int_sat(v.s3), convert_int_sat(v.s4), convert_int_sat(v.s5), convert_int_sat(v.s6), convert_int_sat(v.s7)); } INLINE OVERLOADABLE uint8 convert_uint8_sat(int8 v) { return (uint8)(convert_uint_sat(v.s0), convert_uint_sat(v.s1), convert_uint_sat(v.s2), convert_uint_sat(v.s3), convert_uint_sat(v.s4), convert_uint_sat(v.s5), convert_uint_sat(v.s6), convert_uint_sat(v.s7)); } INLINE OVERLOADABLE short8 convert_short8_sat(int8 v) { return (short8)(convert_short_sat(v.s0), convert_short_sat(v.s1), convert_short_sat(v.s2), convert_short_sat(v.s3), convert_short_sat(v.s4), convert_short_sat(v.s5), convert_short_sat(v.s6), convert_short_sat(v.s7)); } INLINE OVERLOADABLE ushort8 convert_ushort8_sat(int8 v) { return (ushort8)(convert_ushort_sat(v.s0), convert_ushort_sat(v.s1), convert_ushort_sat(v.s2), convert_ushort_sat(v.s3), convert_ushort_sat(v.s4), convert_ushort_sat(v.s5), convert_ushort_sat(v.s6), convert_ushort_sat(v.s7)); } INLINE OVERLOADABLE char8 convert_char8_sat(int8 v) { return (char8)(convert_char_sat(v.s0), convert_char_sat(v.s1), convert_char_sat(v.s2), convert_char_sat(v.s3), convert_char_sat(v.s4), convert_char_sat(v.s5), convert_char_sat(v.s6), convert_char_sat(v.s7)); } INLINE OVERLOADABLE uchar8 convert_uchar8_sat(int8 v) { return (uchar8)(convert_uchar_sat(v.s0), convert_uchar_sat(v.s1), convert_uchar_sat(v.s2), convert_uchar_sat(v.s3), convert_uchar_sat(v.s4), convert_uchar_sat(v.s5), convert_uchar_sat(v.s6), convert_uchar_sat(v.s7)); } INLINE OVERLOADABLE long8 convert_long8_sat(uint8 v) { return (long8)(convert_long_sat(v.s0), convert_long_sat(v.s1), convert_long_sat(v.s2), convert_long_sat(v.s3), convert_long_sat(v.s4), convert_long_sat(v.s5), convert_long_sat(v.s6), convert_long_sat(v.s7)); } INLINE OVERLOADABLE ulong8 convert_ulong8_sat(uint8 v) { return (ulong8)(convert_ulong_sat(v.s0), convert_ulong_sat(v.s1), convert_ulong_sat(v.s2), convert_ulong_sat(v.s3), convert_ulong_sat(v.s4), convert_ulong_sat(v.s5), convert_ulong_sat(v.s6), convert_ulong_sat(v.s7)); } INLINE OVERLOADABLE int8 convert_int8_sat(uint8 v) { return (int8)(convert_int_sat(v.s0), convert_int_sat(v.s1), convert_int_sat(v.s2), convert_int_sat(v.s3), convert_int_sat(v.s4), convert_int_sat(v.s5), convert_int_sat(v.s6), convert_int_sat(v.s7)); } INLINE OVERLOADABLE uint8 convert_uint8_sat(uint8 v) { return (uint8)(convert_uint_sat(v.s0), convert_uint_sat(v.s1), convert_uint_sat(v.s2), convert_uint_sat(v.s3), convert_uint_sat(v.s4), convert_uint_sat(v.s5), convert_uint_sat(v.s6), convert_uint_sat(v.s7)); } INLINE OVERLOADABLE short8 convert_short8_sat(uint8 v) { return (short8)(convert_short_sat(v.s0), convert_short_sat(v.s1), convert_short_sat(v.s2), convert_short_sat(v.s3), convert_short_sat(v.s4), convert_short_sat(v.s5), convert_short_sat(v.s6), convert_short_sat(v.s7)); } INLINE OVERLOADABLE ushort8 convert_ushort8_sat(uint8 v) { return (ushort8)(convert_ushort_sat(v.s0), convert_ushort_sat(v.s1), convert_ushort_sat(v.s2), convert_ushort_sat(v.s3), convert_ushort_sat(v.s4), convert_ushort_sat(v.s5), convert_ushort_sat(v.s6), convert_ushort_sat(v.s7)); } INLINE OVERLOADABLE char8 convert_char8_sat(uint8 v) { return (char8)(convert_char_sat(v.s0), convert_char_sat(v.s1), convert_char_sat(v.s2), convert_char_sat(v.s3), convert_char_sat(v.s4), convert_char_sat(v.s5), convert_char_sat(v.s6), convert_char_sat(v.s7)); } INLINE OVERLOADABLE uchar8 convert_uchar8_sat(uint8 v) { return (uchar8)(convert_uchar_sat(v.s0), convert_uchar_sat(v.s1), convert_uchar_sat(v.s2), convert_uchar_sat(v.s3), convert_uchar_sat(v.s4), convert_uchar_sat(v.s5), convert_uchar_sat(v.s6), convert_uchar_sat(v.s7)); } INLINE OVERLOADABLE long8 convert_long8_sat(short8 v) { return (long8)(convert_long_sat(v.s0), convert_long_sat(v.s1), convert_long_sat(v.s2), convert_long_sat(v.s3), convert_long_sat(v.s4), convert_long_sat(v.s5), convert_long_sat(v.s6), convert_long_sat(v.s7)); } INLINE OVERLOADABLE ulong8 convert_ulong8_sat(short8 v) { return (ulong8)(convert_ulong_sat(v.s0), convert_ulong_sat(v.s1), convert_ulong_sat(v.s2), convert_ulong_sat(v.s3), convert_ulong_sat(v.s4), convert_ulong_sat(v.s5), convert_ulong_sat(v.s6), convert_ulong_sat(v.s7)); } INLINE OVERLOADABLE int8 convert_int8_sat(short8 v) { return (int8)(convert_int_sat(v.s0), convert_int_sat(v.s1), convert_int_sat(v.s2), convert_int_sat(v.s3), convert_int_sat(v.s4), convert_int_sat(v.s5), convert_int_sat(v.s6), convert_int_sat(v.s7)); } INLINE OVERLOADABLE uint8 convert_uint8_sat(short8 v) { return (uint8)(convert_uint_sat(v.s0), convert_uint_sat(v.s1), convert_uint_sat(v.s2), convert_uint_sat(v.s3), convert_uint_sat(v.s4), convert_uint_sat(v.s5), convert_uint_sat(v.s6), convert_uint_sat(v.s7)); } INLINE OVERLOADABLE short8 convert_short8_sat(short8 v) { return (short8)(convert_short_sat(v.s0), convert_short_sat(v.s1), convert_short_sat(v.s2), convert_short_sat(v.s3), convert_short_sat(v.s4), convert_short_sat(v.s5), convert_short_sat(v.s6), convert_short_sat(v.s7)); } INLINE OVERLOADABLE ushort8 convert_ushort8_sat(short8 v) { return (ushort8)(convert_ushort_sat(v.s0), convert_ushort_sat(v.s1), convert_ushort_sat(v.s2), convert_ushort_sat(v.s3), convert_ushort_sat(v.s4), convert_ushort_sat(v.s5), convert_ushort_sat(v.s6), convert_ushort_sat(v.s7)); } INLINE OVERLOADABLE char8 convert_char8_sat(short8 v) { return (char8)(convert_char_sat(v.s0), convert_char_sat(v.s1), convert_char_sat(v.s2), convert_char_sat(v.s3), convert_char_sat(v.s4), convert_char_sat(v.s5), convert_char_sat(v.s6), convert_char_sat(v.s7)); } INLINE OVERLOADABLE uchar8 convert_uchar8_sat(short8 v) { return (uchar8)(convert_uchar_sat(v.s0), convert_uchar_sat(v.s1), convert_uchar_sat(v.s2), convert_uchar_sat(v.s3), convert_uchar_sat(v.s4), convert_uchar_sat(v.s5), convert_uchar_sat(v.s6), convert_uchar_sat(v.s7)); } INLINE OVERLOADABLE long8 convert_long8_sat(ushort8 v) { return (long8)(convert_long_sat(v.s0), convert_long_sat(v.s1), convert_long_sat(v.s2), convert_long_sat(v.s3), convert_long_sat(v.s4), convert_long_sat(v.s5), convert_long_sat(v.s6), convert_long_sat(v.s7)); } INLINE OVERLOADABLE ulong8 convert_ulong8_sat(ushort8 v) { return (ulong8)(convert_ulong_sat(v.s0), convert_ulong_sat(v.s1), convert_ulong_sat(v.s2), convert_ulong_sat(v.s3), convert_ulong_sat(v.s4), convert_ulong_sat(v.s5), convert_ulong_sat(v.s6), convert_ulong_sat(v.s7)); } INLINE OVERLOADABLE int8 convert_int8_sat(ushort8 v) { return (int8)(convert_int_sat(v.s0), convert_int_sat(v.s1), convert_int_sat(v.s2), convert_int_sat(v.s3), convert_int_sat(v.s4), convert_int_sat(v.s5), convert_int_sat(v.s6), convert_int_sat(v.s7)); } INLINE OVERLOADABLE uint8 convert_uint8_sat(ushort8 v) { return (uint8)(convert_uint_sat(v.s0), convert_uint_sat(v.s1), convert_uint_sat(v.s2), convert_uint_sat(v.s3), convert_uint_sat(v.s4), convert_uint_sat(v.s5), convert_uint_sat(v.s6), convert_uint_sat(v.s7)); } INLINE OVERLOADABLE short8 convert_short8_sat(ushort8 v) { return (short8)(convert_short_sat(v.s0), convert_short_sat(v.s1), convert_short_sat(v.s2), convert_short_sat(v.s3), convert_short_sat(v.s4), convert_short_sat(v.s5), convert_short_sat(v.s6), convert_short_sat(v.s7)); } INLINE OVERLOADABLE ushort8 convert_ushort8_sat(ushort8 v) { return (ushort8)(convert_ushort_sat(v.s0), convert_ushort_sat(v.s1), convert_ushort_sat(v.s2), convert_ushort_sat(v.s3), convert_ushort_sat(v.s4), convert_ushort_sat(v.s5), convert_ushort_sat(v.s6), convert_ushort_sat(v.s7)); } INLINE OVERLOADABLE char8 convert_char8_sat(ushort8 v) { return (char8)(convert_char_sat(v.s0), convert_char_sat(v.s1), convert_char_sat(v.s2), convert_char_sat(v.s3), convert_char_sat(v.s4), convert_char_sat(v.s5), convert_char_sat(v.s6), convert_char_sat(v.s7)); } INLINE OVERLOADABLE uchar8 convert_uchar8_sat(ushort8 v) { return (uchar8)(convert_uchar_sat(v.s0), convert_uchar_sat(v.s1), convert_uchar_sat(v.s2), convert_uchar_sat(v.s3), convert_uchar_sat(v.s4), convert_uchar_sat(v.s5), convert_uchar_sat(v.s6), convert_uchar_sat(v.s7)); } INLINE OVERLOADABLE long8 convert_long8_sat(char8 v) { return (long8)(convert_long_sat(v.s0), convert_long_sat(v.s1), convert_long_sat(v.s2), convert_long_sat(v.s3), convert_long_sat(v.s4), convert_long_sat(v.s5), convert_long_sat(v.s6), convert_long_sat(v.s7)); } INLINE OVERLOADABLE ulong8 convert_ulong8_sat(char8 v) { return (ulong8)(convert_ulong_sat(v.s0), convert_ulong_sat(v.s1), convert_ulong_sat(v.s2), convert_ulong_sat(v.s3), convert_ulong_sat(v.s4), convert_ulong_sat(v.s5), convert_ulong_sat(v.s6), convert_ulong_sat(v.s7)); } INLINE OVERLOADABLE int8 convert_int8_sat(char8 v) { return (int8)(convert_int_sat(v.s0), convert_int_sat(v.s1), convert_int_sat(v.s2), convert_int_sat(v.s3), convert_int_sat(v.s4), convert_int_sat(v.s5), convert_int_sat(v.s6), convert_int_sat(v.s7)); } INLINE OVERLOADABLE uint8 convert_uint8_sat(char8 v) { return (uint8)(convert_uint_sat(v.s0), convert_uint_sat(v.s1), convert_uint_sat(v.s2), convert_uint_sat(v.s3), convert_uint_sat(v.s4), convert_uint_sat(v.s5), convert_uint_sat(v.s6), convert_uint_sat(v.s7)); } INLINE OVERLOADABLE short8 convert_short8_sat(char8 v) { return (short8)(convert_short_sat(v.s0), convert_short_sat(v.s1), convert_short_sat(v.s2), convert_short_sat(v.s3), convert_short_sat(v.s4), convert_short_sat(v.s5), convert_short_sat(v.s6), convert_short_sat(v.s7)); } INLINE OVERLOADABLE ushort8 convert_ushort8_sat(char8 v) { return (ushort8)(convert_ushort_sat(v.s0), convert_ushort_sat(v.s1), convert_ushort_sat(v.s2), convert_ushort_sat(v.s3), convert_ushort_sat(v.s4), convert_ushort_sat(v.s5), convert_ushort_sat(v.s6), convert_ushort_sat(v.s7)); } INLINE OVERLOADABLE char8 convert_char8_sat(char8 v) { return (char8)(convert_char_sat(v.s0), convert_char_sat(v.s1), convert_char_sat(v.s2), convert_char_sat(v.s3), convert_char_sat(v.s4), convert_char_sat(v.s5), convert_char_sat(v.s6), convert_char_sat(v.s7)); } INLINE OVERLOADABLE uchar8 convert_uchar8_sat(char8 v) { return (uchar8)(convert_uchar_sat(v.s0), convert_uchar_sat(v.s1), convert_uchar_sat(v.s2), convert_uchar_sat(v.s3), convert_uchar_sat(v.s4), convert_uchar_sat(v.s5), convert_uchar_sat(v.s6), convert_uchar_sat(v.s7)); } INLINE OVERLOADABLE long8 convert_long8_sat(uchar8 v) { return (long8)(convert_long_sat(v.s0), convert_long_sat(v.s1), convert_long_sat(v.s2), convert_long_sat(v.s3), convert_long_sat(v.s4), convert_long_sat(v.s5), convert_long_sat(v.s6), convert_long_sat(v.s7)); } INLINE OVERLOADABLE ulong8 convert_ulong8_sat(uchar8 v) { return (ulong8)(convert_ulong_sat(v.s0), convert_ulong_sat(v.s1), convert_ulong_sat(v.s2), convert_ulong_sat(v.s3), convert_ulong_sat(v.s4), convert_ulong_sat(v.s5), convert_ulong_sat(v.s6), convert_ulong_sat(v.s7)); } INLINE OVERLOADABLE int8 convert_int8_sat(uchar8 v) { return (int8)(convert_int_sat(v.s0), convert_int_sat(v.s1), convert_int_sat(v.s2), convert_int_sat(v.s3), convert_int_sat(v.s4), convert_int_sat(v.s5), convert_int_sat(v.s6), convert_int_sat(v.s7)); } INLINE OVERLOADABLE uint8 convert_uint8_sat(uchar8 v) { return (uint8)(convert_uint_sat(v.s0), convert_uint_sat(v.s1), convert_uint_sat(v.s2), convert_uint_sat(v.s3), convert_uint_sat(v.s4), convert_uint_sat(v.s5), convert_uint_sat(v.s6), convert_uint_sat(v.s7)); } INLINE OVERLOADABLE short8 convert_short8_sat(uchar8 v) { return (short8)(convert_short_sat(v.s0), convert_short_sat(v.s1), convert_short_sat(v.s2), convert_short_sat(v.s3), convert_short_sat(v.s4), convert_short_sat(v.s5), convert_short_sat(v.s6), convert_short_sat(v.s7)); } INLINE OVERLOADABLE ushort8 convert_ushort8_sat(uchar8 v) { return (ushort8)(convert_ushort_sat(v.s0), convert_ushort_sat(v.s1), convert_ushort_sat(v.s2), convert_ushort_sat(v.s3), convert_ushort_sat(v.s4), convert_ushort_sat(v.s5), convert_ushort_sat(v.s6), convert_ushort_sat(v.s7)); } INLINE OVERLOADABLE char8 convert_char8_sat(uchar8 v) { return (char8)(convert_char_sat(v.s0), convert_char_sat(v.s1), convert_char_sat(v.s2), convert_char_sat(v.s3), convert_char_sat(v.s4), convert_char_sat(v.s5), convert_char_sat(v.s6), convert_char_sat(v.s7)); } INLINE OVERLOADABLE uchar8 convert_uchar8_sat(uchar8 v) { return (uchar8)(convert_uchar_sat(v.s0), convert_uchar_sat(v.s1), convert_uchar_sat(v.s2), convert_uchar_sat(v.s3), convert_uchar_sat(v.s4), convert_uchar_sat(v.s5), convert_uchar_sat(v.s6), convert_uchar_sat(v.s7)); } INLINE OVERLOADABLE long8 convert_long8_sat(float8 v) { return (long8)(convert_long_sat(v.s0), convert_long_sat(v.s1), convert_long_sat(v.s2), convert_long_sat(v.s3), convert_long_sat(v.s4), convert_long_sat(v.s5), convert_long_sat(v.s6), convert_long_sat(v.s7)); } INLINE OVERLOADABLE ulong8 convert_ulong8_sat(float8 v) { return (ulong8)(convert_ulong_sat(v.s0), convert_ulong_sat(v.s1), convert_ulong_sat(v.s2), convert_ulong_sat(v.s3), convert_ulong_sat(v.s4), convert_ulong_sat(v.s5), convert_ulong_sat(v.s6), convert_ulong_sat(v.s7)); } INLINE OVERLOADABLE int8 convert_int8_sat(float8 v) { return (int8)(convert_int_sat(v.s0), convert_int_sat(v.s1), convert_int_sat(v.s2), convert_int_sat(v.s3), convert_int_sat(v.s4), convert_int_sat(v.s5), convert_int_sat(v.s6), convert_int_sat(v.s7)); } INLINE OVERLOADABLE uint8 convert_uint8_sat(float8 v) { return (uint8)(convert_uint_sat(v.s0), convert_uint_sat(v.s1), convert_uint_sat(v.s2), convert_uint_sat(v.s3), convert_uint_sat(v.s4), convert_uint_sat(v.s5), convert_uint_sat(v.s6), convert_uint_sat(v.s7)); } INLINE OVERLOADABLE short8 convert_short8_sat(float8 v) { return (short8)(convert_short_sat(v.s0), convert_short_sat(v.s1), convert_short_sat(v.s2), convert_short_sat(v.s3), convert_short_sat(v.s4), convert_short_sat(v.s5), convert_short_sat(v.s6), convert_short_sat(v.s7)); } INLINE OVERLOADABLE ushort8 convert_ushort8_sat(float8 v) { return (ushort8)(convert_ushort_sat(v.s0), convert_ushort_sat(v.s1), convert_ushort_sat(v.s2), convert_ushort_sat(v.s3), convert_ushort_sat(v.s4), convert_ushort_sat(v.s5), convert_ushort_sat(v.s6), convert_ushort_sat(v.s7)); } INLINE OVERLOADABLE char8 convert_char8_sat(float8 v) { return (char8)(convert_char_sat(v.s0), convert_char_sat(v.s1), convert_char_sat(v.s2), convert_char_sat(v.s3), convert_char_sat(v.s4), convert_char_sat(v.s5), convert_char_sat(v.s6), convert_char_sat(v.s7)); } INLINE OVERLOADABLE uchar8 convert_uchar8_sat(float8 v) { return (uchar8)(convert_uchar_sat(v.s0), convert_uchar_sat(v.s1), convert_uchar_sat(v.s2), convert_uchar_sat(v.s3), convert_uchar_sat(v.s4), convert_uchar_sat(v.s5), convert_uchar_sat(v.s6), convert_uchar_sat(v.s7)); } INLINE OVERLOADABLE long16 convert_long16_sat(long16 v) { return (long16)(convert_long_sat(v.s0), convert_long_sat(v.s1), convert_long_sat(v.s2), convert_long_sat(v.s3), convert_long_sat(v.s4), convert_long_sat(v.s5), convert_long_sat(v.s6), convert_long_sat(v.s7), convert_long_sat(v.s8), convert_long_sat(v.s9), convert_long_sat(v.sA), convert_long_sat(v.sB), convert_long_sat(v.sC), convert_long_sat(v.sD), convert_long_sat(v.sE), convert_long_sat(v.sF)); } INLINE OVERLOADABLE ulong16 convert_ulong16_sat(long16 v) { return (ulong16)(convert_ulong_sat(v.s0), convert_ulong_sat(v.s1), convert_ulong_sat(v.s2), convert_ulong_sat(v.s3), convert_ulong_sat(v.s4), convert_ulong_sat(v.s5), convert_ulong_sat(v.s6), convert_ulong_sat(v.s7), convert_ulong_sat(v.s8), convert_ulong_sat(v.s9), convert_ulong_sat(v.sA), convert_ulong_sat(v.sB), convert_ulong_sat(v.sC), convert_ulong_sat(v.sD), convert_ulong_sat(v.sE), convert_ulong_sat(v.sF)); } INLINE OVERLOADABLE int16 convert_int16_sat(long16 v) { return (int16)(convert_int_sat(v.s0), convert_int_sat(v.s1), convert_int_sat(v.s2), convert_int_sat(v.s3), convert_int_sat(v.s4), convert_int_sat(v.s5), convert_int_sat(v.s6), convert_int_sat(v.s7), convert_int_sat(v.s8), convert_int_sat(v.s9), convert_int_sat(v.sA), convert_int_sat(v.sB), convert_int_sat(v.sC), convert_int_sat(v.sD), convert_int_sat(v.sE), convert_int_sat(v.sF)); } INLINE OVERLOADABLE uint16 convert_uint16_sat(long16 v) { return (uint16)(convert_uint_sat(v.s0), convert_uint_sat(v.s1), convert_uint_sat(v.s2), convert_uint_sat(v.s3), convert_uint_sat(v.s4), convert_uint_sat(v.s5), convert_uint_sat(v.s6), convert_uint_sat(v.s7), convert_uint_sat(v.s8), convert_uint_sat(v.s9), convert_uint_sat(v.sA), convert_uint_sat(v.sB), convert_uint_sat(v.sC), convert_uint_sat(v.sD), convert_uint_sat(v.sE), convert_uint_sat(v.sF)); } INLINE OVERLOADABLE short16 convert_short16_sat(long16 v) { return (short16)(convert_short_sat(v.s0), convert_short_sat(v.s1), convert_short_sat(v.s2), convert_short_sat(v.s3), convert_short_sat(v.s4), convert_short_sat(v.s5), convert_short_sat(v.s6), convert_short_sat(v.s7), convert_short_sat(v.s8), convert_short_sat(v.s9), convert_short_sat(v.sA), convert_short_sat(v.sB), convert_short_sat(v.sC), convert_short_sat(v.sD), convert_short_sat(v.sE), convert_short_sat(v.sF)); } INLINE OVERLOADABLE ushort16 convert_ushort16_sat(long16 v) { return (ushort16)(convert_ushort_sat(v.s0), convert_ushort_sat(v.s1), convert_ushort_sat(v.s2), convert_ushort_sat(v.s3), convert_ushort_sat(v.s4), convert_ushort_sat(v.s5), convert_ushort_sat(v.s6), convert_ushort_sat(v.s7), convert_ushort_sat(v.s8), convert_ushort_sat(v.s9), convert_ushort_sat(v.sA), convert_ushort_sat(v.sB), convert_ushort_sat(v.sC), convert_ushort_sat(v.sD), convert_ushort_sat(v.sE), convert_ushort_sat(v.sF)); } INLINE OVERLOADABLE char16 convert_char16_sat(long16 v) { return (char16)(convert_char_sat(v.s0), convert_char_sat(v.s1), convert_char_sat(v.s2), convert_char_sat(v.s3), convert_char_sat(v.s4), convert_char_sat(v.s5), convert_char_sat(v.s6), convert_char_sat(v.s7), convert_char_sat(v.s8), convert_char_sat(v.s9), convert_char_sat(v.sA), convert_char_sat(v.sB), convert_char_sat(v.sC), convert_char_sat(v.sD), convert_char_sat(v.sE), convert_char_sat(v.sF)); } INLINE OVERLOADABLE uchar16 convert_uchar16_sat(long16 v) { return (uchar16)(convert_uchar_sat(v.s0), convert_uchar_sat(v.s1), convert_uchar_sat(v.s2), convert_uchar_sat(v.s3), convert_uchar_sat(v.s4), convert_uchar_sat(v.s5), convert_uchar_sat(v.s6), convert_uchar_sat(v.s7), convert_uchar_sat(v.s8), convert_uchar_sat(v.s9), convert_uchar_sat(v.sA), convert_uchar_sat(v.sB), convert_uchar_sat(v.sC), convert_uchar_sat(v.sD), convert_uchar_sat(v.sE), convert_uchar_sat(v.sF)); } INLINE OVERLOADABLE long16 convert_long16_sat(ulong16 v) { return (long16)(convert_long_sat(v.s0), convert_long_sat(v.s1), convert_long_sat(v.s2), convert_long_sat(v.s3), convert_long_sat(v.s4), convert_long_sat(v.s5), convert_long_sat(v.s6), convert_long_sat(v.s7), convert_long_sat(v.s8), convert_long_sat(v.s9), convert_long_sat(v.sA), convert_long_sat(v.sB), convert_long_sat(v.sC), convert_long_sat(v.sD), convert_long_sat(v.sE), convert_long_sat(v.sF)); } INLINE OVERLOADABLE ulong16 convert_ulong16_sat(ulong16 v) { return (ulong16)(convert_ulong_sat(v.s0), convert_ulong_sat(v.s1), convert_ulong_sat(v.s2), convert_ulong_sat(v.s3), convert_ulong_sat(v.s4), convert_ulong_sat(v.s5), convert_ulong_sat(v.s6), convert_ulong_sat(v.s7), convert_ulong_sat(v.s8), convert_ulong_sat(v.s9), convert_ulong_sat(v.sA), convert_ulong_sat(v.sB), convert_ulong_sat(v.sC), convert_ulong_sat(v.sD), convert_ulong_sat(v.sE), convert_ulong_sat(v.sF)); } INLINE OVERLOADABLE int16 convert_int16_sat(ulong16 v) { return (int16)(convert_int_sat(v.s0), convert_int_sat(v.s1), convert_int_sat(v.s2), convert_int_sat(v.s3), convert_int_sat(v.s4), convert_int_sat(v.s5), convert_int_sat(v.s6), convert_int_sat(v.s7), convert_int_sat(v.s8), convert_int_sat(v.s9), convert_int_sat(v.sA), convert_int_sat(v.sB), convert_int_sat(v.sC), convert_int_sat(v.sD), convert_int_sat(v.sE), convert_int_sat(v.sF)); } INLINE OVERLOADABLE uint16 convert_uint16_sat(ulong16 v) { return (uint16)(convert_uint_sat(v.s0), convert_uint_sat(v.s1), convert_uint_sat(v.s2), convert_uint_sat(v.s3), convert_uint_sat(v.s4), convert_uint_sat(v.s5), convert_uint_sat(v.s6), convert_uint_sat(v.s7), convert_uint_sat(v.s8), convert_uint_sat(v.s9), convert_uint_sat(v.sA), convert_uint_sat(v.sB), convert_uint_sat(v.sC), convert_uint_sat(v.sD), convert_uint_sat(v.sE), convert_uint_sat(v.sF)); } INLINE OVERLOADABLE short16 convert_short16_sat(ulong16 v) { return (short16)(convert_short_sat(v.s0), convert_short_sat(v.s1), convert_short_sat(v.s2), convert_short_sat(v.s3), convert_short_sat(v.s4), convert_short_sat(v.s5), convert_short_sat(v.s6), convert_short_sat(v.s7), convert_short_sat(v.s8), convert_short_sat(v.s9), convert_short_sat(v.sA), convert_short_sat(v.sB), convert_short_sat(v.sC), convert_short_sat(v.sD), convert_short_sat(v.sE), convert_short_sat(v.sF)); } INLINE OVERLOADABLE ushort16 convert_ushort16_sat(ulong16 v) { return (ushort16)(convert_ushort_sat(v.s0), convert_ushort_sat(v.s1), convert_ushort_sat(v.s2), convert_ushort_sat(v.s3), convert_ushort_sat(v.s4), convert_ushort_sat(v.s5), convert_ushort_sat(v.s6), convert_ushort_sat(v.s7), convert_ushort_sat(v.s8), convert_ushort_sat(v.s9), convert_ushort_sat(v.sA), convert_ushort_sat(v.sB), convert_ushort_sat(v.sC), convert_ushort_sat(v.sD), convert_ushort_sat(v.sE), convert_ushort_sat(v.sF)); } INLINE OVERLOADABLE char16 convert_char16_sat(ulong16 v) { return (char16)(convert_char_sat(v.s0), convert_char_sat(v.s1), convert_char_sat(v.s2), convert_char_sat(v.s3), convert_char_sat(v.s4), convert_char_sat(v.s5), convert_char_sat(v.s6), convert_char_sat(v.s7), convert_char_sat(v.s8), convert_char_sat(v.s9), convert_char_sat(v.sA), convert_char_sat(v.sB), convert_char_sat(v.sC), convert_char_sat(v.sD), convert_char_sat(v.sE), convert_char_sat(v.sF)); } INLINE OVERLOADABLE uchar16 convert_uchar16_sat(ulong16 v) { return (uchar16)(convert_uchar_sat(v.s0), convert_uchar_sat(v.s1), convert_uchar_sat(v.s2), convert_uchar_sat(v.s3), convert_uchar_sat(v.s4), convert_uchar_sat(v.s5), convert_uchar_sat(v.s6), convert_uchar_sat(v.s7), convert_uchar_sat(v.s8), convert_uchar_sat(v.s9), convert_uchar_sat(v.sA), convert_uchar_sat(v.sB), convert_uchar_sat(v.sC), convert_uchar_sat(v.sD), convert_uchar_sat(v.sE), convert_uchar_sat(v.sF)); } INLINE OVERLOADABLE long16 convert_long16_sat(int16 v) { return (long16)(convert_long_sat(v.s0), convert_long_sat(v.s1), convert_long_sat(v.s2), convert_long_sat(v.s3), convert_long_sat(v.s4), convert_long_sat(v.s5), convert_long_sat(v.s6), convert_long_sat(v.s7), convert_long_sat(v.s8), convert_long_sat(v.s9), convert_long_sat(v.sA), convert_long_sat(v.sB), convert_long_sat(v.sC), convert_long_sat(v.sD), convert_long_sat(v.sE), convert_long_sat(v.sF)); } INLINE OVERLOADABLE ulong16 convert_ulong16_sat(int16 v) { return (ulong16)(convert_ulong_sat(v.s0), convert_ulong_sat(v.s1), convert_ulong_sat(v.s2), convert_ulong_sat(v.s3), convert_ulong_sat(v.s4), convert_ulong_sat(v.s5), convert_ulong_sat(v.s6), convert_ulong_sat(v.s7), convert_ulong_sat(v.s8), convert_ulong_sat(v.s9), convert_ulong_sat(v.sA), convert_ulong_sat(v.sB), convert_ulong_sat(v.sC), convert_ulong_sat(v.sD), convert_ulong_sat(v.sE), convert_ulong_sat(v.sF)); } INLINE OVERLOADABLE int16 convert_int16_sat(int16 v) { return (int16)(convert_int_sat(v.s0), convert_int_sat(v.s1), convert_int_sat(v.s2), convert_int_sat(v.s3), convert_int_sat(v.s4), convert_int_sat(v.s5), convert_int_sat(v.s6), convert_int_sat(v.s7), convert_int_sat(v.s8), convert_int_sat(v.s9), convert_int_sat(v.sA), convert_int_sat(v.sB), convert_int_sat(v.sC), convert_int_sat(v.sD), convert_int_sat(v.sE), convert_int_sat(v.sF)); } INLINE OVERLOADABLE uint16 convert_uint16_sat(int16 v) { return (uint16)(convert_uint_sat(v.s0), convert_uint_sat(v.s1), convert_uint_sat(v.s2), convert_uint_sat(v.s3), convert_uint_sat(v.s4), convert_uint_sat(v.s5), convert_uint_sat(v.s6), convert_uint_sat(v.s7), convert_uint_sat(v.s8), convert_uint_sat(v.s9), convert_uint_sat(v.sA), convert_uint_sat(v.sB), convert_uint_sat(v.sC), convert_uint_sat(v.sD), convert_uint_sat(v.sE), convert_uint_sat(v.sF)); } INLINE OVERLOADABLE short16 convert_short16_sat(int16 v) { return (short16)(convert_short_sat(v.s0), convert_short_sat(v.s1), convert_short_sat(v.s2), convert_short_sat(v.s3), convert_short_sat(v.s4), convert_short_sat(v.s5), convert_short_sat(v.s6), convert_short_sat(v.s7), convert_short_sat(v.s8), convert_short_sat(v.s9), convert_short_sat(v.sA), convert_short_sat(v.sB), convert_short_sat(v.sC), convert_short_sat(v.sD), convert_short_sat(v.sE), convert_short_sat(v.sF)); } INLINE OVERLOADABLE ushort16 convert_ushort16_sat(int16 v) { return (ushort16)(convert_ushort_sat(v.s0), convert_ushort_sat(v.s1), convert_ushort_sat(v.s2), convert_ushort_sat(v.s3), convert_ushort_sat(v.s4), convert_ushort_sat(v.s5), convert_ushort_sat(v.s6), convert_ushort_sat(v.s7), convert_ushort_sat(v.s8), convert_ushort_sat(v.s9), convert_ushort_sat(v.sA), convert_ushort_sat(v.sB), convert_ushort_sat(v.sC), convert_ushort_sat(v.sD), convert_ushort_sat(v.sE), convert_ushort_sat(v.sF)); } INLINE OVERLOADABLE char16 convert_char16_sat(int16 v) { return (char16)(convert_char_sat(v.s0), convert_char_sat(v.s1), convert_char_sat(v.s2), convert_char_sat(v.s3), convert_char_sat(v.s4), convert_char_sat(v.s5), convert_char_sat(v.s6), convert_char_sat(v.s7), convert_char_sat(v.s8), convert_char_sat(v.s9), convert_char_sat(v.sA), convert_char_sat(v.sB), convert_char_sat(v.sC), convert_char_sat(v.sD), convert_char_sat(v.sE), convert_char_sat(v.sF)); } INLINE OVERLOADABLE uchar16 convert_uchar16_sat(int16 v) { return (uchar16)(convert_uchar_sat(v.s0), convert_uchar_sat(v.s1), convert_uchar_sat(v.s2), convert_uchar_sat(v.s3), convert_uchar_sat(v.s4), convert_uchar_sat(v.s5), convert_uchar_sat(v.s6), convert_uchar_sat(v.s7), convert_uchar_sat(v.s8), convert_uchar_sat(v.s9), convert_uchar_sat(v.sA), convert_uchar_sat(v.sB), convert_uchar_sat(v.sC), convert_uchar_sat(v.sD), convert_uchar_sat(v.sE), convert_uchar_sat(v.sF)); } INLINE OVERLOADABLE long16 convert_long16_sat(uint16 v) { return (long16)(convert_long_sat(v.s0), convert_long_sat(v.s1), convert_long_sat(v.s2), convert_long_sat(v.s3), convert_long_sat(v.s4), convert_long_sat(v.s5), convert_long_sat(v.s6), convert_long_sat(v.s7), convert_long_sat(v.s8), convert_long_sat(v.s9), convert_long_sat(v.sA), convert_long_sat(v.sB), convert_long_sat(v.sC), convert_long_sat(v.sD), convert_long_sat(v.sE), convert_long_sat(v.sF)); } INLINE OVERLOADABLE ulong16 convert_ulong16_sat(uint16 v) { return (ulong16)(convert_ulong_sat(v.s0), convert_ulong_sat(v.s1), convert_ulong_sat(v.s2), convert_ulong_sat(v.s3), convert_ulong_sat(v.s4), convert_ulong_sat(v.s5), convert_ulong_sat(v.s6), convert_ulong_sat(v.s7), convert_ulong_sat(v.s8), convert_ulong_sat(v.s9), convert_ulong_sat(v.sA), convert_ulong_sat(v.sB), convert_ulong_sat(v.sC), convert_ulong_sat(v.sD), convert_ulong_sat(v.sE), convert_ulong_sat(v.sF)); } INLINE OVERLOADABLE int16 convert_int16_sat(uint16 v) { return (int16)(convert_int_sat(v.s0), convert_int_sat(v.s1), convert_int_sat(v.s2), convert_int_sat(v.s3), convert_int_sat(v.s4), convert_int_sat(v.s5), convert_int_sat(v.s6), convert_int_sat(v.s7), convert_int_sat(v.s8), convert_int_sat(v.s9), convert_int_sat(v.sA), convert_int_sat(v.sB), convert_int_sat(v.sC), convert_int_sat(v.sD), convert_int_sat(v.sE), convert_int_sat(v.sF)); } INLINE OVERLOADABLE uint16 convert_uint16_sat(uint16 v) { return (uint16)(convert_uint_sat(v.s0), convert_uint_sat(v.s1), convert_uint_sat(v.s2), convert_uint_sat(v.s3), convert_uint_sat(v.s4), convert_uint_sat(v.s5), convert_uint_sat(v.s6), convert_uint_sat(v.s7), convert_uint_sat(v.s8), convert_uint_sat(v.s9), convert_uint_sat(v.sA), convert_uint_sat(v.sB), convert_uint_sat(v.sC), convert_uint_sat(v.sD), convert_uint_sat(v.sE), convert_uint_sat(v.sF)); } INLINE OVERLOADABLE short16 convert_short16_sat(uint16 v) { return (short16)(convert_short_sat(v.s0), convert_short_sat(v.s1), convert_short_sat(v.s2), convert_short_sat(v.s3), convert_short_sat(v.s4), convert_short_sat(v.s5), convert_short_sat(v.s6), convert_short_sat(v.s7), convert_short_sat(v.s8), convert_short_sat(v.s9), convert_short_sat(v.sA), convert_short_sat(v.sB), convert_short_sat(v.sC), convert_short_sat(v.sD), convert_short_sat(v.sE), convert_short_sat(v.sF)); } INLINE OVERLOADABLE ushort16 convert_ushort16_sat(uint16 v) { return (ushort16)(convert_ushort_sat(v.s0), convert_ushort_sat(v.s1), convert_ushort_sat(v.s2), convert_ushort_sat(v.s3), convert_ushort_sat(v.s4), convert_ushort_sat(v.s5), convert_ushort_sat(v.s6), convert_ushort_sat(v.s7), convert_ushort_sat(v.s8), convert_ushort_sat(v.s9), convert_ushort_sat(v.sA), convert_ushort_sat(v.sB), convert_ushort_sat(v.sC), convert_ushort_sat(v.sD), convert_ushort_sat(v.sE), convert_ushort_sat(v.sF)); } INLINE OVERLOADABLE char16 convert_char16_sat(uint16 v) { return (char16)(convert_char_sat(v.s0), convert_char_sat(v.s1), convert_char_sat(v.s2), convert_char_sat(v.s3), convert_char_sat(v.s4), convert_char_sat(v.s5), convert_char_sat(v.s6), convert_char_sat(v.s7), convert_char_sat(v.s8), convert_char_sat(v.s9), convert_char_sat(v.sA), convert_char_sat(v.sB), convert_char_sat(v.sC), convert_char_sat(v.sD), convert_char_sat(v.sE), convert_char_sat(v.sF)); } INLINE OVERLOADABLE uchar16 convert_uchar16_sat(uint16 v) { return (uchar16)(convert_uchar_sat(v.s0), convert_uchar_sat(v.s1), convert_uchar_sat(v.s2), convert_uchar_sat(v.s3), convert_uchar_sat(v.s4), convert_uchar_sat(v.s5), convert_uchar_sat(v.s6), convert_uchar_sat(v.s7), convert_uchar_sat(v.s8), convert_uchar_sat(v.s9), convert_uchar_sat(v.sA), convert_uchar_sat(v.sB), convert_uchar_sat(v.sC), convert_uchar_sat(v.sD), convert_uchar_sat(v.sE), convert_uchar_sat(v.sF)); } INLINE OVERLOADABLE long16 convert_long16_sat(short16 v) { return (long16)(convert_long_sat(v.s0), convert_long_sat(v.s1), convert_long_sat(v.s2), convert_long_sat(v.s3), convert_long_sat(v.s4), convert_long_sat(v.s5), convert_long_sat(v.s6), convert_long_sat(v.s7), convert_long_sat(v.s8), convert_long_sat(v.s9), convert_long_sat(v.sA), convert_long_sat(v.sB), convert_long_sat(v.sC), convert_long_sat(v.sD), convert_long_sat(v.sE), convert_long_sat(v.sF)); } INLINE OVERLOADABLE ulong16 convert_ulong16_sat(short16 v) { return (ulong16)(convert_ulong_sat(v.s0), convert_ulong_sat(v.s1), convert_ulong_sat(v.s2), convert_ulong_sat(v.s3), convert_ulong_sat(v.s4), convert_ulong_sat(v.s5), convert_ulong_sat(v.s6), convert_ulong_sat(v.s7), convert_ulong_sat(v.s8), convert_ulong_sat(v.s9), convert_ulong_sat(v.sA), convert_ulong_sat(v.sB), convert_ulong_sat(v.sC), convert_ulong_sat(v.sD), convert_ulong_sat(v.sE), convert_ulong_sat(v.sF)); } INLINE OVERLOADABLE int16 convert_int16_sat(short16 v) { return (int16)(convert_int_sat(v.s0), convert_int_sat(v.s1), convert_int_sat(v.s2), convert_int_sat(v.s3), convert_int_sat(v.s4), convert_int_sat(v.s5), convert_int_sat(v.s6), convert_int_sat(v.s7), convert_int_sat(v.s8), convert_int_sat(v.s9), convert_int_sat(v.sA), convert_int_sat(v.sB), convert_int_sat(v.sC), convert_int_sat(v.sD), convert_int_sat(v.sE), convert_int_sat(v.sF)); } INLINE OVERLOADABLE uint16 convert_uint16_sat(short16 v) { return (uint16)(convert_uint_sat(v.s0), convert_uint_sat(v.s1), convert_uint_sat(v.s2), convert_uint_sat(v.s3), convert_uint_sat(v.s4), convert_uint_sat(v.s5), convert_uint_sat(v.s6), convert_uint_sat(v.s7), convert_uint_sat(v.s8), convert_uint_sat(v.s9), convert_uint_sat(v.sA), convert_uint_sat(v.sB), convert_uint_sat(v.sC), convert_uint_sat(v.sD), convert_uint_sat(v.sE), convert_uint_sat(v.sF)); } INLINE OVERLOADABLE short16 convert_short16_sat(short16 v) { return (short16)(convert_short_sat(v.s0), convert_short_sat(v.s1), convert_short_sat(v.s2), convert_short_sat(v.s3), convert_short_sat(v.s4), convert_short_sat(v.s5), convert_short_sat(v.s6), convert_short_sat(v.s7), convert_short_sat(v.s8), convert_short_sat(v.s9), convert_short_sat(v.sA), convert_short_sat(v.sB), convert_short_sat(v.sC), convert_short_sat(v.sD), convert_short_sat(v.sE), convert_short_sat(v.sF)); } INLINE OVERLOADABLE ushort16 convert_ushort16_sat(short16 v) { return (ushort16)(convert_ushort_sat(v.s0), convert_ushort_sat(v.s1), convert_ushort_sat(v.s2), convert_ushort_sat(v.s3), convert_ushort_sat(v.s4), convert_ushort_sat(v.s5), convert_ushort_sat(v.s6), convert_ushort_sat(v.s7), convert_ushort_sat(v.s8), convert_ushort_sat(v.s9), convert_ushort_sat(v.sA), convert_ushort_sat(v.sB), convert_ushort_sat(v.sC), convert_ushort_sat(v.sD), convert_ushort_sat(v.sE), convert_ushort_sat(v.sF)); } INLINE OVERLOADABLE char16 convert_char16_sat(short16 v) { return (char16)(convert_char_sat(v.s0), convert_char_sat(v.s1), convert_char_sat(v.s2), convert_char_sat(v.s3), convert_char_sat(v.s4), convert_char_sat(v.s5), convert_char_sat(v.s6), convert_char_sat(v.s7), convert_char_sat(v.s8), convert_char_sat(v.s9), convert_char_sat(v.sA), convert_char_sat(v.sB), convert_char_sat(v.sC), convert_char_sat(v.sD), convert_char_sat(v.sE), convert_char_sat(v.sF)); } INLINE OVERLOADABLE uchar16 convert_uchar16_sat(short16 v) { return (uchar16)(convert_uchar_sat(v.s0), convert_uchar_sat(v.s1), convert_uchar_sat(v.s2), convert_uchar_sat(v.s3), convert_uchar_sat(v.s4), convert_uchar_sat(v.s5), convert_uchar_sat(v.s6), convert_uchar_sat(v.s7), convert_uchar_sat(v.s8), convert_uchar_sat(v.s9), convert_uchar_sat(v.sA), convert_uchar_sat(v.sB), convert_uchar_sat(v.sC), convert_uchar_sat(v.sD), convert_uchar_sat(v.sE), convert_uchar_sat(v.sF)); } INLINE OVERLOADABLE long16 convert_long16_sat(ushort16 v) { return (long16)(convert_long_sat(v.s0), convert_long_sat(v.s1), convert_long_sat(v.s2), convert_long_sat(v.s3), convert_long_sat(v.s4), convert_long_sat(v.s5), convert_long_sat(v.s6), convert_long_sat(v.s7), convert_long_sat(v.s8), convert_long_sat(v.s9), convert_long_sat(v.sA), convert_long_sat(v.sB), convert_long_sat(v.sC), convert_long_sat(v.sD), convert_long_sat(v.sE), convert_long_sat(v.sF)); } INLINE OVERLOADABLE ulong16 convert_ulong16_sat(ushort16 v) { return (ulong16)(convert_ulong_sat(v.s0), convert_ulong_sat(v.s1), convert_ulong_sat(v.s2), convert_ulong_sat(v.s3), convert_ulong_sat(v.s4), convert_ulong_sat(v.s5), convert_ulong_sat(v.s6), convert_ulong_sat(v.s7), convert_ulong_sat(v.s8), convert_ulong_sat(v.s9), convert_ulong_sat(v.sA), convert_ulong_sat(v.sB), convert_ulong_sat(v.sC), convert_ulong_sat(v.sD), convert_ulong_sat(v.sE), convert_ulong_sat(v.sF)); } INLINE OVERLOADABLE int16 convert_int16_sat(ushort16 v) { return (int16)(convert_int_sat(v.s0), convert_int_sat(v.s1), convert_int_sat(v.s2), convert_int_sat(v.s3), convert_int_sat(v.s4), convert_int_sat(v.s5), convert_int_sat(v.s6), convert_int_sat(v.s7), convert_int_sat(v.s8), convert_int_sat(v.s9), convert_int_sat(v.sA), convert_int_sat(v.sB), convert_int_sat(v.sC), convert_int_sat(v.sD), convert_int_sat(v.sE), convert_int_sat(v.sF)); } INLINE OVERLOADABLE uint16 convert_uint16_sat(ushort16 v) { return (uint16)(convert_uint_sat(v.s0), convert_uint_sat(v.s1), convert_uint_sat(v.s2), convert_uint_sat(v.s3), convert_uint_sat(v.s4), convert_uint_sat(v.s5), convert_uint_sat(v.s6), convert_uint_sat(v.s7), convert_uint_sat(v.s8), convert_uint_sat(v.s9), convert_uint_sat(v.sA), convert_uint_sat(v.sB), convert_uint_sat(v.sC), convert_uint_sat(v.sD), convert_uint_sat(v.sE), convert_uint_sat(v.sF)); } INLINE OVERLOADABLE short16 convert_short16_sat(ushort16 v) { return (short16)(convert_short_sat(v.s0), convert_short_sat(v.s1), convert_short_sat(v.s2), convert_short_sat(v.s3), convert_short_sat(v.s4), convert_short_sat(v.s5), convert_short_sat(v.s6), convert_short_sat(v.s7), convert_short_sat(v.s8), convert_short_sat(v.s9), convert_short_sat(v.sA), convert_short_sat(v.sB), convert_short_sat(v.sC), convert_short_sat(v.sD), convert_short_sat(v.sE), convert_short_sat(v.sF)); } INLINE OVERLOADABLE ushort16 convert_ushort16_sat(ushort16 v) { return (ushort16)(convert_ushort_sat(v.s0), convert_ushort_sat(v.s1), convert_ushort_sat(v.s2), convert_ushort_sat(v.s3), convert_ushort_sat(v.s4), convert_ushort_sat(v.s5), convert_ushort_sat(v.s6), convert_ushort_sat(v.s7), convert_ushort_sat(v.s8), convert_ushort_sat(v.s9), convert_ushort_sat(v.sA), convert_ushort_sat(v.sB), convert_ushort_sat(v.sC), convert_ushort_sat(v.sD), convert_ushort_sat(v.sE), convert_ushort_sat(v.sF)); } INLINE OVERLOADABLE char16 convert_char16_sat(ushort16 v) { return (char16)(convert_char_sat(v.s0), convert_char_sat(v.s1), convert_char_sat(v.s2), convert_char_sat(v.s3), convert_char_sat(v.s4), convert_char_sat(v.s5), convert_char_sat(v.s6), convert_char_sat(v.s7), convert_char_sat(v.s8), convert_char_sat(v.s9), convert_char_sat(v.sA), convert_char_sat(v.sB), convert_char_sat(v.sC), convert_char_sat(v.sD), convert_char_sat(v.sE), convert_char_sat(v.sF)); } INLINE OVERLOADABLE uchar16 convert_uchar16_sat(ushort16 v) { return (uchar16)(convert_uchar_sat(v.s0), convert_uchar_sat(v.s1), convert_uchar_sat(v.s2), convert_uchar_sat(v.s3), convert_uchar_sat(v.s4), convert_uchar_sat(v.s5), convert_uchar_sat(v.s6), convert_uchar_sat(v.s7), convert_uchar_sat(v.s8), convert_uchar_sat(v.s9), convert_uchar_sat(v.sA), convert_uchar_sat(v.sB), convert_uchar_sat(v.sC), convert_uchar_sat(v.sD), convert_uchar_sat(v.sE), convert_uchar_sat(v.sF)); } INLINE OVERLOADABLE long16 convert_long16_sat(char16 v) { return (long16)(convert_long_sat(v.s0), convert_long_sat(v.s1), convert_long_sat(v.s2), convert_long_sat(v.s3), convert_long_sat(v.s4), convert_long_sat(v.s5), convert_long_sat(v.s6), convert_long_sat(v.s7), convert_long_sat(v.s8), convert_long_sat(v.s9), convert_long_sat(v.sA), convert_long_sat(v.sB), convert_long_sat(v.sC), convert_long_sat(v.sD), convert_long_sat(v.sE), convert_long_sat(v.sF)); } INLINE OVERLOADABLE ulong16 convert_ulong16_sat(char16 v) { return (ulong16)(convert_ulong_sat(v.s0), convert_ulong_sat(v.s1), convert_ulong_sat(v.s2), convert_ulong_sat(v.s3), convert_ulong_sat(v.s4), convert_ulong_sat(v.s5), convert_ulong_sat(v.s6), convert_ulong_sat(v.s7), convert_ulong_sat(v.s8), convert_ulong_sat(v.s9), convert_ulong_sat(v.sA), convert_ulong_sat(v.sB), convert_ulong_sat(v.sC), convert_ulong_sat(v.sD), convert_ulong_sat(v.sE), convert_ulong_sat(v.sF)); } INLINE OVERLOADABLE int16 convert_int16_sat(char16 v) { return (int16)(convert_int_sat(v.s0), convert_int_sat(v.s1), convert_int_sat(v.s2), convert_int_sat(v.s3), convert_int_sat(v.s4), convert_int_sat(v.s5), convert_int_sat(v.s6), convert_int_sat(v.s7), convert_int_sat(v.s8), convert_int_sat(v.s9), convert_int_sat(v.sA), convert_int_sat(v.sB), convert_int_sat(v.sC), convert_int_sat(v.sD), convert_int_sat(v.sE), convert_int_sat(v.sF)); } INLINE OVERLOADABLE uint16 convert_uint16_sat(char16 v) { return (uint16)(convert_uint_sat(v.s0), convert_uint_sat(v.s1), convert_uint_sat(v.s2), convert_uint_sat(v.s3), convert_uint_sat(v.s4), convert_uint_sat(v.s5), convert_uint_sat(v.s6), convert_uint_sat(v.s7), convert_uint_sat(v.s8), convert_uint_sat(v.s9), convert_uint_sat(v.sA), convert_uint_sat(v.sB), convert_uint_sat(v.sC), convert_uint_sat(v.sD), convert_uint_sat(v.sE), convert_uint_sat(v.sF)); } INLINE OVERLOADABLE short16 convert_short16_sat(char16 v) { return (short16)(convert_short_sat(v.s0), convert_short_sat(v.s1), convert_short_sat(v.s2), convert_short_sat(v.s3), convert_short_sat(v.s4), convert_short_sat(v.s5), convert_short_sat(v.s6), convert_short_sat(v.s7), convert_short_sat(v.s8), convert_short_sat(v.s9), convert_short_sat(v.sA), convert_short_sat(v.sB), convert_short_sat(v.sC), convert_short_sat(v.sD), convert_short_sat(v.sE), convert_short_sat(v.sF)); } INLINE OVERLOADABLE ushort16 convert_ushort16_sat(char16 v) { return (ushort16)(convert_ushort_sat(v.s0), convert_ushort_sat(v.s1), convert_ushort_sat(v.s2), convert_ushort_sat(v.s3), convert_ushort_sat(v.s4), convert_ushort_sat(v.s5), convert_ushort_sat(v.s6), convert_ushort_sat(v.s7), convert_ushort_sat(v.s8), convert_ushort_sat(v.s9), convert_ushort_sat(v.sA), convert_ushort_sat(v.sB), convert_ushort_sat(v.sC), convert_ushort_sat(v.sD), convert_ushort_sat(v.sE), convert_ushort_sat(v.sF)); } INLINE OVERLOADABLE char16 convert_char16_sat(char16 v) { return (char16)(convert_char_sat(v.s0), convert_char_sat(v.s1), convert_char_sat(v.s2), convert_char_sat(v.s3), convert_char_sat(v.s4), convert_char_sat(v.s5), convert_char_sat(v.s6), convert_char_sat(v.s7), convert_char_sat(v.s8), convert_char_sat(v.s9), convert_char_sat(v.sA), convert_char_sat(v.sB), convert_char_sat(v.sC), convert_char_sat(v.sD), convert_char_sat(v.sE), convert_char_sat(v.sF)); } INLINE OVERLOADABLE uchar16 convert_uchar16_sat(char16 v) { return (uchar16)(convert_uchar_sat(v.s0), convert_uchar_sat(v.s1), convert_uchar_sat(v.s2), convert_uchar_sat(v.s3), convert_uchar_sat(v.s4), convert_uchar_sat(v.s5), convert_uchar_sat(v.s6), convert_uchar_sat(v.s7), convert_uchar_sat(v.s8), convert_uchar_sat(v.s9), convert_uchar_sat(v.sA), convert_uchar_sat(v.sB), convert_uchar_sat(v.sC), convert_uchar_sat(v.sD), convert_uchar_sat(v.sE), convert_uchar_sat(v.sF)); } INLINE OVERLOADABLE long16 convert_long16_sat(uchar16 v) { return (long16)(convert_long_sat(v.s0), convert_long_sat(v.s1), convert_long_sat(v.s2), convert_long_sat(v.s3), convert_long_sat(v.s4), convert_long_sat(v.s5), convert_long_sat(v.s6), convert_long_sat(v.s7), convert_long_sat(v.s8), convert_long_sat(v.s9), convert_long_sat(v.sA), convert_long_sat(v.sB), convert_long_sat(v.sC), convert_long_sat(v.sD), convert_long_sat(v.sE), convert_long_sat(v.sF)); } INLINE OVERLOADABLE ulong16 convert_ulong16_sat(uchar16 v) { return (ulong16)(convert_ulong_sat(v.s0), convert_ulong_sat(v.s1), convert_ulong_sat(v.s2), convert_ulong_sat(v.s3), convert_ulong_sat(v.s4), convert_ulong_sat(v.s5), convert_ulong_sat(v.s6), convert_ulong_sat(v.s7), convert_ulong_sat(v.s8), convert_ulong_sat(v.s9), convert_ulong_sat(v.sA), convert_ulong_sat(v.sB), convert_ulong_sat(v.sC), convert_ulong_sat(v.sD), convert_ulong_sat(v.sE), convert_ulong_sat(v.sF)); } INLINE OVERLOADABLE int16 convert_int16_sat(uchar16 v) { return (int16)(convert_int_sat(v.s0), convert_int_sat(v.s1), convert_int_sat(v.s2), convert_int_sat(v.s3), convert_int_sat(v.s4), convert_int_sat(v.s5), convert_int_sat(v.s6), convert_int_sat(v.s7), convert_int_sat(v.s8), convert_int_sat(v.s9), convert_int_sat(v.sA), convert_int_sat(v.sB), convert_int_sat(v.sC), convert_int_sat(v.sD), convert_int_sat(v.sE), convert_int_sat(v.sF)); } INLINE OVERLOADABLE uint16 convert_uint16_sat(uchar16 v) { return (uint16)(convert_uint_sat(v.s0), convert_uint_sat(v.s1), convert_uint_sat(v.s2), convert_uint_sat(v.s3), convert_uint_sat(v.s4), convert_uint_sat(v.s5), convert_uint_sat(v.s6), convert_uint_sat(v.s7), convert_uint_sat(v.s8), convert_uint_sat(v.s9), convert_uint_sat(v.sA), convert_uint_sat(v.sB), convert_uint_sat(v.sC), convert_uint_sat(v.sD), convert_uint_sat(v.sE), convert_uint_sat(v.sF)); } INLINE OVERLOADABLE short16 convert_short16_sat(uchar16 v) { return (short16)(convert_short_sat(v.s0), convert_short_sat(v.s1), convert_short_sat(v.s2), convert_short_sat(v.s3), convert_short_sat(v.s4), convert_short_sat(v.s5), convert_short_sat(v.s6), convert_short_sat(v.s7), convert_short_sat(v.s8), convert_short_sat(v.s9), convert_short_sat(v.sA), convert_short_sat(v.sB), convert_short_sat(v.sC), convert_short_sat(v.sD), convert_short_sat(v.sE), convert_short_sat(v.sF)); } INLINE OVERLOADABLE ushort16 convert_ushort16_sat(uchar16 v) { return (ushort16)(convert_ushort_sat(v.s0), convert_ushort_sat(v.s1), convert_ushort_sat(v.s2), convert_ushort_sat(v.s3), convert_ushort_sat(v.s4), convert_ushort_sat(v.s5), convert_ushort_sat(v.s6), convert_ushort_sat(v.s7), convert_ushort_sat(v.s8), convert_ushort_sat(v.s9), convert_ushort_sat(v.sA), convert_ushort_sat(v.sB), convert_ushort_sat(v.sC), convert_ushort_sat(v.sD), convert_ushort_sat(v.sE), convert_ushort_sat(v.sF)); } INLINE OVERLOADABLE char16 convert_char16_sat(uchar16 v) { return (char16)(convert_char_sat(v.s0), convert_char_sat(v.s1), convert_char_sat(v.s2), convert_char_sat(v.s3), convert_char_sat(v.s4), convert_char_sat(v.s5), convert_char_sat(v.s6), convert_char_sat(v.s7), convert_char_sat(v.s8), convert_char_sat(v.s9), convert_char_sat(v.sA), convert_char_sat(v.sB), convert_char_sat(v.sC), convert_char_sat(v.sD), convert_char_sat(v.sE), convert_char_sat(v.sF)); } INLINE OVERLOADABLE uchar16 convert_uchar16_sat(uchar16 v) { return (uchar16)(convert_uchar_sat(v.s0), convert_uchar_sat(v.s1), convert_uchar_sat(v.s2), convert_uchar_sat(v.s3), convert_uchar_sat(v.s4), convert_uchar_sat(v.s5), convert_uchar_sat(v.s6), convert_uchar_sat(v.s7), convert_uchar_sat(v.s8), convert_uchar_sat(v.s9), convert_uchar_sat(v.sA), convert_uchar_sat(v.sB), convert_uchar_sat(v.sC), convert_uchar_sat(v.sD), convert_uchar_sat(v.sE), convert_uchar_sat(v.sF)); } INLINE OVERLOADABLE long16 convert_long16_sat(float16 v) { return (long16)(convert_long_sat(v.s0), convert_long_sat(v.s1), convert_long_sat(v.s2), convert_long_sat(v.s3), convert_long_sat(v.s4), convert_long_sat(v.s5), convert_long_sat(v.s6), convert_long_sat(v.s7), convert_long_sat(v.s8), convert_long_sat(v.s9), convert_long_sat(v.sA), convert_long_sat(v.sB), convert_long_sat(v.sC), convert_long_sat(v.sD), convert_long_sat(v.sE), convert_long_sat(v.sF)); } INLINE OVERLOADABLE ulong16 convert_ulong16_sat(float16 v) { return (ulong16)(convert_ulong_sat(v.s0), convert_ulong_sat(v.s1), convert_ulong_sat(v.s2), convert_ulong_sat(v.s3), convert_ulong_sat(v.s4), convert_ulong_sat(v.s5), convert_ulong_sat(v.s6), convert_ulong_sat(v.s7), convert_ulong_sat(v.s8), convert_ulong_sat(v.s9), convert_ulong_sat(v.sA), convert_ulong_sat(v.sB), convert_ulong_sat(v.sC), convert_ulong_sat(v.sD), convert_ulong_sat(v.sE), convert_ulong_sat(v.sF)); } INLINE OVERLOADABLE int16 convert_int16_sat(float16 v) { return (int16)(convert_int_sat(v.s0), convert_int_sat(v.s1), convert_int_sat(v.s2), convert_int_sat(v.s3), convert_int_sat(v.s4), convert_int_sat(v.s5), convert_int_sat(v.s6), convert_int_sat(v.s7), convert_int_sat(v.s8), convert_int_sat(v.s9), convert_int_sat(v.sA), convert_int_sat(v.sB), convert_int_sat(v.sC), convert_int_sat(v.sD), convert_int_sat(v.sE), convert_int_sat(v.sF)); } INLINE OVERLOADABLE uint16 convert_uint16_sat(float16 v) { return (uint16)(convert_uint_sat(v.s0), convert_uint_sat(v.s1), convert_uint_sat(v.s2), convert_uint_sat(v.s3), convert_uint_sat(v.s4), convert_uint_sat(v.s5), convert_uint_sat(v.s6), convert_uint_sat(v.s7), convert_uint_sat(v.s8), convert_uint_sat(v.s9), convert_uint_sat(v.sA), convert_uint_sat(v.sB), convert_uint_sat(v.sC), convert_uint_sat(v.sD), convert_uint_sat(v.sE), convert_uint_sat(v.sF)); } INLINE OVERLOADABLE short16 convert_short16_sat(float16 v) { return (short16)(convert_short_sat(v.s0), convert_short_sat(v.s1), convert_short_sat(v.s2), convert_short_sat(v.s3), convert_short_sat(v.s4), convert_short_sat(v.s5), convert_short_sat(v.s6), convert_short_sat(v.s7), convert_short_sat(v.s8), convert_short_sat(v.s9), convert_short_sat(v.sA), convert_short_sat(v.sB), convert_short_sat(v.sC), convert_short_sat(v.sD), convert_short_sat(v.sE), convert_short_sat(v.sF)); } INLINE OVERLOADABLE ushort16 convert_ushort16_sat(float16 v) { return (ushort16)(convert_ushort_sat(v.s0), convert_ushort_sat(v.s1), convert_ushort_sat(v.s2), convert_ushort_sat(v.s3), convert_ushort_sat(v.s4), convert_ushort_sat(v.s5), convert_ushort_sat(v.s6), convert_ushort_sat(v.s7), convert_ushort_sat(v.s8), convert_ushort_sat(v.s9), convert_ushort_sat(v.sA), convert_ushort_sat(v.sB), convert_ushort_sat(v.sC), convert_ushort_sat(v.sD), convert_ushort_sat(v.sE), convert_ushort_sat(v.sF)); } INLINE OVERLOADABLE char16 convert_char16_sat(float16 v) { return (char16)(convert_char_sat(v.s0), convert_char_sat(v.s1), convert_char_sat(v.s2), convert_char_sat(v.s3), convert_char_sat(v.s4), convert_char_sat(v.s5), convert_char_sat(v.s6), convert_char_sat(v.s7), convert_char_sat(v.s8), convert_char_sat(v.s9), convert_char_sat(v.sA), convert_char_sat(v.sB), convert_char_sat(v.sC), convert_char_sat(v.sD), convert_char_sat(v.sE), convert_char_sat(v.sF)); } INLINE OVERLOADABLE uchar16 convert_uchar16_sat(float16 v) { return (uchar16)(convert_uchar_sat(v.s0), convert_uchar_sat(v.s1), convert_uchar_sat(v.s2), convert_uchar_sat(v.s3), convert_uchar_sat(v.s4), convert_uchar_sat(v.s5), convert_uchar_sat(v.s6), convert_uchar_sat(v.s7), convert_uchar_sat(v.s8), convert_uchar_sat(v.s9), convert_uchar_sat(v.sA), convert_uchar_sat(v.sB), convert_uchar_sat(v.sC), convert_uchar_sat(v.sD), convert_uchar_sat(v.sE), convert_uchar_sat(v.sF)); } Release_v0.3/backend/src/ocl_stdlib.tmpl.h000066400000000000000000002552361223142177000206400ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #ifndef __GEN_OCL_STDLIB_H__ #define __GEN_OCL_STDLIB_H__ #define INLINE inline __attribute__((always_inline)) #define OVERLOADABLE __attribute__((overloadable)) #define PURE __attribute__((pure)) #define CONST __attribute__((const)) #define INLINE_OVERLOADABLE inline __attribute__((overloadable,always_inline)) ///////////////////////////////////////////////////////////////////////////// // OpenCL built-in scalar data types ///////////////////////////////////////////////////////////////////////////// typedef unsigned char uchar; typedef unsigned short ushort; typedef unsigned int uint; typedef unsigned long ulong; typedef __typeof__(sizeof(int)) size_t; typedef __typeof__((int *)0-(int *)0) ptrdiff_t; typedef signed int intptr_t; typedef unsigned int uintptr_t; ///////////////////////////////////////////////////////////////////////////// // OpenCL address space ///////////////////////////////////////////////////////////////////////////// // These are built-ins in LLVM 3.3. #if 100*__clang_major__ + __clang_minor__ <= 302 #define __private __attribute__((address_space(0))) #define __global __attribute__((address_space(1))) #define __constant __attribute__((address_space(2))) #define __local __attribute__((address_space(3))) #define global __global #define local __local #define constant __constant #define private __private #endif #pragma OPENCL EXTENSION cl_khr_fp64 : enable ///////////////////////////////////////////////////////////////////////////// // OpenCL built-in vector data types ///////////////////////////////////////////////////////////////////////////// #define DEF(type) typedef type type##2 __attribute__((ext_vector_type(2)));\ typedef type type##3 __attribute__((ext_vector_type(3)));\ typedef type type##4 __attribute__((ext_vector_type(4)));\ typedef type type##8 __attribute__((ext_vector_type(8)));\ typedef type type##16 __attribute__((ext_vector_type(16))); DEF(char); DEF(uchar); DEF(short); DEF(ushort); DEF(int); DEF(uint); DEF(long); DEF(ulong); DEF(float); DEF(double); #undef DEF ///////////////////////////////////////////////////////////////////////////// // OpenCL other built-in data types ///////////////////////////////////////////////////////////////////////////// // FIXME: // This is a transitional hack to bypass the LLVM 3.3 built-in types. // See the Khronos SPIR specification for handling of these types. #define __texture __attribute__((address_space(4))) struct _image2d_t; typedef __texture struct _image2d_t* __image2d_t; struct _image3d_t; typedef __texture struct _image3d_t* __image3d_t; typedef const uint __sampler_t; typedef size_t __event_t; #define image2d_t __image2d_t #define image3d_t __image3d_t #define sampler_t __sampler_t #define event_t __event_t ///////////////////////////////////////////////////////////////////////////// // OpenCL conversions & type casting ///////////////////////////////////////////////////////////////////////////// // ##BEGIN_AS## // ##END_AS## // ##BEGIN_CONVERT## // ##END_CONVERT## ///////////////////////////////////////////////////////////////////////////// // OpenCL preprocessor directives & macros ///////////////////////////////////////////////////////////////////////////// #define __OPENCL_VERSION__ 110 #define __CL_VERSION_1_0__ 100 #define __CL_VERSION_1_1__ 110 #define __ENDIAN_LITTLE__ 1 #define __IMAGE_SUPPORT__ 1 #define __kernel_exec(X, TYPE) __kernel __attribute__((work_group_size_hint(X,1,1))) \ __attribute__((vec_type_hint(TYPE))) #define kernel_exec(X, TYPE) __kernel_exec(X, TYPE) ///////////////////////////////////////////////////////////////////////////// // OpenCL floating-point macros and pragmas ///////////////////////////////////////////////////////////////////////////// #define FLT_DIG 6 #define FLT_MANT_DIG 24 #define FLT_MAX_10_EXP +38 #define FLT_MAX_EXP +128 #define FLT_MIN_10_EXP -37 #define FLT_MIN_EXP -125 #define FLT_RADIX 2 #define FLT_ONE 1.0000000000e+00 /* 0x3F800000 */ #define FLT_MAX 0x1.fffffep127f #define FLT_MIN 0x1.0p-126f #define FLT_EPSILON 0x1.0p-23f #define MAXFLOAT 3.40282347e38F #define HUGE_VALF (__builtin_huge_valf()) #define INFINITY (__builtin_inff()) #define NAN (__builtin_nanf("")) #define M_E_F 2.718281828459045F #define M_LOG2E_F 1.4426950408889634F #define M_LOG10E_F 0.43429448190325176F #define M_LN2_F 0.6931471805599453F #define M_LN10_F 2.302585092994046F #define M_PI_F 3.141592653589793F #define M_PI_2_F 1.5707963267948966F #define M_PI_4_F 0.7853981633974483F #define M_1_PI_F 0.3183098861837907F #define M_2_PI_F 0.6366197723675814F #define M_2_SQRTPI_F 1.1283791670955126F #define M_SQRT2_F 1.4142135623730951F #define M_SQRT1_2_F 0.7071067811865476F ///////////////////////////////////////////////////////////////////////////// // OpenCL integer built-in macros ///////////////////////////////////////////////////////////////////////////// #define CHAR_BIT 8 #define CHAR_MAX SCHAR_MAX #define CHAR_MIN SCHAR_MIN #define INT_MAX 2147483647 #define INT_MIN (-2147483647 - 1) #define LONG_MAX 0x7fffffffffffffffL #define LONG_MIN (-0x7fffffffffffffffL - 1) #define SCHAR_MAX 127 #define SCHAR_MIN (-127 - 1) #define SHRT_MAX 32767 #define SHRT_MIN (-32767 - 1) #define UCHAR_MAX 255 #define USHRT_MAX 65535 #define UINT_MAX 0xffffffff #define ULONG_MAX 0xffffffffffffffffUL ///////////////////////////////////////////////////////////////////////////// // OpenCL relational built-in functions ///////////////////////////////////////////////////////////////////////////// int INLINE_OVERLOADABLE isequal(float x, float y) { return x == y; } int INLINE_OVERLOADABLE isnotequal(float x, float y) { return x != y; } int INLINE_OVERLOADABLE isgreater(float x, float y) { return x > y; } int INLINE_OVERLOADABLE isgreaterequal(float x, float y) { return x >= y; } int INLINE_OVERLOADABLE isless(float x, float y) { return x < y; } int INLINE_OVERLOADABLE islessequal(float x, float y) { return x <= y; } int INLINE_OVERLOADABLE islessgreater(float x, float y) { return (x < y) || (x > y); } #define SDEF(TYPE) \ OVERLOADABLE TYPE ocl_sadd_sat(TYPE x, TYPE y); \ OVERLOADABLE TYPE ocl_ssub_sat(TYPE x, TYPE y); \ INLINE_OVERLOADABLE TYPE add_sat(TYPE x, TYPE y) { return ocl_sadd_sat(x, y); } \ INLINE_OVERLOADABLE TYPE sub_sat(TYPE x, TYPE y) { return ocl_ssub_sat(x, y); } SDEF(char); SDEF(short); #undef SDEF OVERLOADABLE int ocl_sadd_sat(int x, int y); INLINE_OVERLOADABLE int add_sat(int x, int y) { return ocl_sadd_sat(x, y); } OVERLOADABLE int ocl_ssub_sat(int x, int y); INLINE_OVERLOADABLE int sub_sat(int x, int y) { return (y == 0x80000000u) ? (x & 0x7FFFFFFF) : ocl_ssub_sat(x, y); } OVERLOADABLE long ocl_sadd_sat(long x, long y); INLINE_OVERLOADABLE long add_sat(long x, long y) { union {long l; uint i[2];} ux, uy; ux.l = x; uy.l = y; if((ux.i[1] ^ uy.i[1]) & 0x80000000u) return x + y; return ocl_sadd_sat(x, y); } OVERLOADABLE long ocl_ssub_sat(long x, long y); INLINE_OVERLOADABLE long sub_sat(long x, long y) { union {long l; uint i[2];} ux, uy; ux.l = x; uy.l = y; if((ux.i[1] ^ uy.i[1]) & 0x80000000u) return ocl_ssub_sat(x, y); return x - y; } #define UDEF(TYPE) \ OVERLOADABLE TYPE ocl_uadd_sat(TYPE x, TYPE y); \ OVERLOADABLE TYPE ocl_usub_sat(TYPE x, TYPE y); \ INLINE_OVERLOADABLE TYPE add_sat(TYPE x, TYPE y) { return ocl_uadd_sat(x, y); } \ INLINE_OVERLOADABLE TYPE sub_sat(TYPE x, TYPE y) { return ocl_usub_sat(x, y); } UDEF(uchar); UDEF(ushort); UDEF(uint); UDEF(ulong); #undef UDEF INLINE_OVERLOADABLE int isfinite(float x) { return __builtin_isfinite(x); } INLINE_OVERLOADABLE int isinf(float x) { return __builtin_isinf(x); } INLINE_OVERLOADABLE int isnan(float x) { union { uint u; float f; } u; u.f = x; return (u.u & 0x7FFFFFFF) > 0x7F800000; } INLINE_OVERLOADABLE int isnormal(float x) { return __builtin_isnormal(x); } INLINE_OVERLOADABLE int isordered(float x, float y) { return isequal(x, x) && isequal(y, y); } INLINE_OVERLOADABLE int isunordered(float x, float y) { return isnan(x) || isnan(y); } INLINE_OVERLOADABLE int signbit(float x) { return __builtin_signbit(x); } #define DEC1(type) INLINE_OVERLOADABLE int any(type a) { return a<0; } #define DEC2(type) INLINE_OVERLOADABLE int any(type a) { return a.s0<0 || a.s1<0; } #define DEC3(type) INLINE_OVERLOADABLE int any(type a) { return a.s0<0 || a.s1<0 || a.s2<0; } #define DEC4(type) INLINE_OVERLOADABLE int any(type a) { return a.s0<0 || a.s1<0 || a.s2<0 || a.s3<0; } #define DEC8(type) INLINE_OVERLOADABLE int any(type a) { return a.s0<0 || a.s1<0 || a.s2<0 || a.s3<0 || a.s4<0 || a.s5<0 || a.s6<0 || a.s7<0; } #define DEC16(type) INLINE_OVERLOADABLE int any(type a) { return a.s0<0 || a.s1<0 || a.s2<0 || a.s3<0 || a.s4<0 || a.s5<0 || a.s6<0 || a.s7<0 || a.s8<0 || a.s9<0 || a.sA<0 || a.sB<0 || a.sC<0 || a.sD<0 || a.sE<0 || a.sF<0; } DEC1(char); DEC1(short); DEC1(int); DEC1(long); #define DEC(n) DEC##n(char##n); DEC##n(short##n); DEC##n(int##n); DEC##n(long##n); DEC(2); DEC(3); DEC(4); DEC(8); DEC(16); #undef DEC #undef DEC1 #undef DEC2 #undef DEC3 #undef DEC4 #undef DEC8 #undef DEC16 #define DEC1(type) INLINE_OVERLOADABLE int all(type a) { return a<0; } #define DEC2(type) INLINE_OVERLOADABLE int all(type a) { return a.s0<0 && a.s1<0; } #define DEC3(type) INLINE_OVERLOADABLE int all(type a) { return a.s0<0 && a.s1<0 && a.s2<0; } #define DEC4(type) INLINE_OVERLOADABLE int all(type a) { return a.s0<0 && a.s1<0 && a.s2<0 && a.s3<0; } #define DEC8(type) INLINE_OVERLOADABLE int all(type a) { return a.s0<0 && a.s1<0 && a.s2<0 && a.s3<0 && a.s4<0 && a.s5<0 && a.s6<0 && a.s7<0; } #define DEC16(type) INLINE_OVERLOADABLE int all(type a) { return a.s0<0 && a.s1<0 && a.s2<0 && a.s3<0 && a.s4<0 && a.s5<0 && a.s6<0 && a.s7<0 && a.s8<0 && a.s9<0 && a.sA<0 && a.sB<0 && a.sC<0 && a.sD<0 && a.sE<0 && a.sF<0; } DEC1(char); DEC1(short); DEC1(int); DEC1(long); #define DEC(n) DEC##n(char##n); DEC##n(short##n); DEC##n(int##n); DEC##n(long##n); DEC(2); DEC(3); DEC(4); DEC(8); DEC(16); #undef DEC #undef DEC1 #undef DEC2 #undef DEC3 #undef DEC4 #undef DEC8 #undef DEC16 #define DEF(type) INLINE_OVERLOADABLE type bitselect(type a, type b, type c) { return (a & ~c) | (b & c); } DEF(char); DEF(uchar); DEF(short); DEF(ushort); DEF(int); DEF(uint) DEF(long); DEF(ulong) #undef DEF INLINE_OVERLOADABLE float bitselect(float a, float b, float c) { return as_float(bitselect(as_int(a), as_int(b), as_int(c))); } ///////////////////////////////////////////////////////////////////////////// // Integer built-in functions ///////////////////////////////////////////////////////////////////////////// PURE CONST uint __gen_ocl_fbh(uint); PURE CONST uint __gen_ocl_fbl(uint); INLINE_OVERLOADABLE char clz(char x) { if (x < 0) return 0; if (x == 0) return 8; return __gen_ocl_fbh(x) - 24; } INLINE_OVERLOADABLE uchar clz(uchar x) { if (x == 0) return 8; return __gen_ocl_fbh(x) - 24; } INLINE_OVERLOADABLE short clz(short x) { if (x < 0) return 0; if (x == 0) return 16; return __gen_ocl_fbh(x) - 16; } INLINE_OVERLOADABLE ushort clz(ushort x) { if (x == 0) return 16; return __gen_ocl_fbh(x) - 16; } INLINE_OVERLOADABLE int clz(int x) { if (x < 0) return 0; if (x == 0) return 32; return __gen_ocl_fbh(x); } INLINE_OVERLOADABLE uint clz(uint x) { if (x == 0) return 32; return __gen_ocl_fbh(x); } INLINE_OVERLOADABLE long clz(long x) { union { int i[2]; long x; } u; u.x = x; if (u.i[1] & 0x80000000u) return 0; if (u.i[1] == 0 && u.i[0] == 0) return 64; uint v = clz(u.i[1]); if(v == 32) v += clz(u.i[0]); return v; } INLINE_OVERLOADABLE ulong clz(ulong x) { if (x == 0) return 64; union { uint i[2]; ulong x; } u; u.x = x; uint v = clz(u.i[1]); if(v == 32) v += clz(u.i[0]); return v; } OVERLOADABLE int __gen_ocl_mul_hi(int x, int y); OVERLOADABLE uint __gen_ocl_mul_hi(uint x, uint y); OVERLOADABLE long __gen_ocl_mul_hi(long x, long y); OVERLOADABLE ulong __gen_ocl_mul_hi(ulong x, ulong y); INLINE_OVERLOADABLE char mul_hi(char x, char y) { return (x * y) >> 8; } INLINE_OVERLOADABLE uchar mul_hi(uchar x, uchar y) { return (x * y) >> 8; } INLINE_OVERLOADABLE short mul_hi(short x, short y) { return (x * y) >> 16; } INLINE_OVERLOADABLE ushort mul_hi(ushort x, ushort y) { return (x * y) >> 16; } INLINE_OVERLOADABLE int mul_hi(int x, int y) { return __gen_ocl_mul_hi(x, y); } INLINE_OVERLOADABLE uint mul_hi(uint x, uint y) { return __gen_ocl_mul_hi(x, y); } INLINE_OVERLOADABLE long mul_hi(long x, long y) { return __gen_ocl_mul_hi(x, y); } INLINE_OVERLOADABLE ulong mul_hi(ulong x, ulong y) { return __gen_ocl_mul_hi(x, y); } #define DEF(type) INLINE_OVERLOADABLE type mad_hi(type a, type b, type c) { return mul_hi(a, b) + c; } DEF(char) DEF(uchar) DEF(short) DEF(ushort) DEF(int) DEF(uint) DEF(long) DEF(ulong) #undef DEF INLINE_OVERLOADABLE int mul24(int a, int b) { return ((a << 8) >> 8) * ((b << 8) >> 8); } INLINE_OVERLOADABLE uint mul24(uint a, uint b) { return (a & 0xFFFFFF) * (b & 0xFFFFFF); } INLINE_OVERLOADABLE int mad24(int a, int b, int c) { return mul24(a, b) + c; } INLINE_OVERLOADABLE uint mad24(uint a, uint b, uint c) { return mul24(a, b) + c; } INLINE_OVERLOADABLE char mad_sat(char a, char b, char c) { int x = (int)a * (int)b + (int)c; if (x > 127) x = 127; if (x < -128) x = -128; return x; } INLINE_OVERLOADABLE uchar mad_sat(uchar a, uchar b, uchar c) { uint x = (uint)a * (uint)b + (uint)c; if (x > 255) x = 255; return x; } INLINE_OVERLOADABLE short mad_sat(short a, short b, short c) { int x = (int)a * (int)b + (int)c; if (x > 32767) x = 32767; if (x < -32768) x = -32768; return x; } INLINE_OVERLOADABLE ushort mad_sat(ushort a, ushort b, ushort c) { uint x = (uint)a * (uint)b + (uint)c; if (x > 65535) x = 65535; return x; } INLINE_OVERLOADABLE int mad_sat(int a, int b, int c) { long x = (long)a * (long)b + (long)c; if (x > 0x7FFFFFFF) x = 0x7FFFFFFF; else if (x < -0x7FFFFFFF-1) x = -0x7FFFFFFF-1; return (int)x; } INLINE_OVERLOADABLE uint mad_sat(uint a, uint b, uint c) { ulong x = (ulong)a * (ulong)b + (ulong)c; if (x > 0xFFFFFFFFu) x = 0xFFFFFFFFu; return (uint)x; } OVERLOADABLE long __gen_ocl_mad_sat(long a, long b, long c); OVERLOADABLE ulong __gen_ocl_mad_sat(ulong a, ulong b, ulong c); INLINE_OVERLOADABLE long mad_sat(long a, long b, long c) { return __gen_ocl_mad_sat(a, b, c); } INLINE_OVERLOADABLE ulong mad_sat(ulong a, ulong b, ulong c) { return __gen_ocl_mad_sat(a, b, c); } INLINE_OVERLOADABLE uchar __rotate_left(uchar x, uchar y) { return (x << y) | (x >> (8 - y)); } INLINE_OVERLOADABLE char __rotate_left(char x, char y) { return __rotate_left((uchar)x, (uchar)y); } INLINE_OVERLOADABLE ushort __rotate_left(ushort x, ushort y) { return (x << y) | (x >> (16 - y)); } INLINE_OVERLOADABLE short __rotate_left(short x, short y) { return __rotate_left((ushort)x, (ushort)y); } INLINE_OVERLOADABLE uint __rotate_left(uint x, uint y) { return (x << y) | (x >> (32 - y)); } INLINE_OVERLOADABLE int __rotate_left(int x, int y) { return __rotate_left((uint)x, (uint)y); } INLINE_OVERLOADABLE ulong __rotate_left(ulong x, ulong y) { return (x << y) | (x >> (64 - y)); } INLINE_OVERLOADABLE long __rotate_left(long x, long y) { return __rotate_left((ulong)x, (ulong)y); } #define DEF(type, m) INLINE_OVERLOADABLE type rotate(type x, type y) { return __rotate_left(x, (type)(y & m)); } DEF(char, 7) DEF(uchar, 7) DEF(short, 15) DEF(ushort, 15) DEF(int, 31) DEF(uint, 31) DEF(long, 63) DEF(ulong, 63) #undef DEF OVERLOADABLE short __gen_ocl_upsample(short hi, short lo); OVERLOADABLE int __gen_ocl_upsample(int hi, int lo); OVERLOADABLE long __gen_ocl_upsample(long hi, long lo); INLINE_OVERLOADABLE short upsample(char hi, uchar lo) { return __gen_ocl_upsample((short)hi, (short)lo); } INLINE_OVERLOADABLE ushort upsample(uchar hi, uchar lo) { return __gen_ocl_upsample((short)hi, (short)lo); } INLINE_OVERLOADABLE int upsample(short hi, ushort lo) { return __gen_ocl_upsample((int)hi, (int)lo); } INLINE_OVERLOADABLE uint upsample(ushort hi, ushort lo) { return __gen_ocl_upsample((int)hi, (int)lo); } INLINE_OVERLOADABLE long upsample(int hi, uint lo) { return __gen_ocl_upsample((long)hi, (long)lo); } INLINE_OVERLOADABLE ulong upsample(uint hi, uint lo) { return __gen_ocl_upsample((long)hi, (long)lo); } OVERLOADABLE uint __gen_ocl_hadd(uint x, uint y); OVERLOADABLE uint __gen_ocl_rhadd(uint x, uint y); #define DEC DEF(char); DEF(uchar); DEF(short); DEF(ushort) #define DEF(type) INLINE_OVERLOADABLE type hadd(type x, type y) { return (x + y) >> 1; } DEC #undef DEF #define DEF(type) INLINE_OVERLOADABLE type rhadd(type x, type y) { return (x + y + 1) >> 1; } DEC #undef DEF #undef DEC INLINE_OVERLOADABLE int hadd(int x, int y) { return (x < 0 && y > 0) || (x > 0 && y < 0) ? ((x + y) >> 1) : __gen_ocl_hadd((uint)x, (uint)y); } INLINE_OVERLOADABLE uint hadd(uint x, uint y) { return __gen_ocl_hadd(x, y); } INLINE_OVERLOADABLE int rhadd(int x, int y) { return (x < 0 && y > 0) || (x > 0 && y < 0) ? ((x + y + 1) >> 1) : __gen_ocl_rhadd((uint)x, (uint)y); } INLINE_OVERLOADABLE uint rhadd(uint x, uint y) { return __gen_ocl_rhadd(x, y); } OVERLOADABLE ulong __gen_ocl_hadd(ulong x, ulong y); OVERLOADABLE ulong __gen_ocl_rhadd(ulong x, ulong y); INLINE_OVERLOADABLE long hadd(long x, long y) { return (x < 0 && y > 0) || (x > 0 && y < 0) ? ((x + y) >> 1) : __gen_ocl_hadd((ulong)x, (ulong)y); } INLINE_OVERLOADABLE ulong hadd(ulong x, ulong y) { return __gen_ocl_hadd(x, y); } INLINE_OVERLOADABLE long rhadd(long x, long y) { return (x < 0 && y > 0) || (x > 0 && y < 0) ? ((x + y + 1) >> 1) : __gen_ocl_rhadd((ulong)x, (ulong)y); } INLINE_OVERLOADABLE ulong rhadd(ulong x, ulong y) { return __gen_ocl_rhadd(x, y); } int __gen_ocl_abs(int x); #define DEC(TYPE) INLINE_OVERLOADABLE u##TYPE abs(TYPE x) { return (u##TYPE) __gen_ocl_abs(x); } DEC(int) DEC(short) DEC(char) #undef DEC INLINE_OVERLOADABLE ulong abs(long x) { return x < 0 ? -x : x; } /* For unsigned types, do nothing. */ #define DEC(TYPE) INLINE_OVERLOADABLE TYPE abs(TYPE x) { return x; } DEC(uint) DEC(ushort) DEC(uchar) DEC(ulong) #undef DEC /* Char and short type abs diff */ /* promote char and short to int and will be no module overflow */ #define DEC(TYPE, UTYPE) INLINE_OVERLOADABLE UTYPE abs_diff(TYPE x, TYPE y) \ { return (UTYPE) (abs((int)x - (int)y)); } DEC(char, uchar) DEC(uchar, uchar) DEC(short, ushort) DEC(ushort, ushort) #undef DEC INLINE_OVERLOADABLE uint abs_diff (uint x, uint y) { /* same signed will never overflow. */ return y > x ? (y -x) : (x - y); } INLINE_OVERLOADABLE uint abs_diff (int x, int y) { /* same signed will never module overflow. */ if ((x >= 0 && y >= 0) || (x <= 0 && y <= 0)) return abs(x - y); return (abs(x) + abs(y)); } INLINE_OVERLOADABLE ulong abs_diff (long x, long y) { if ((x >= 0 && y >= 0) || (x <= 0 && y <= 0)) return abs(x - y); return abs(x) + abs(y); } INLINE_OVERLOADABLE ulong abs_diff (ulong x, ulong y) { return y > x ? (y - x) : (x - y); } ///////////////////////////////////////////////////////////////////////////// // Work Items functions (see 6.11.1 of OCL 1.1 spec) ///////////////////////////////////////////////////////////////////////////// PURE CONST uint __gen_ocl_get_work_dim(void); INLINE uint get_work_dim(void) { return __gen_ocl_get_work_dim(); } #define DECL_INTERNAL_WORK_ITEM_FN(NAME) \ PURE CONST unsigned int __gen_ocl_##NAME##0(void); \ PURE CONST unsigned int __gen_ocl_##NAME##1(void); \ PURE CONST unsigned int __gen_ocl_##NAME##2(void); DECL_INTERNAL_WORK_ITEM_FN(get_group_id) DECL_INTERNAL_WORK_ITEM_FN(get_local_id) DECL_INTERNAL_WORK_ITEM_FN(get_local_size) DECL_INTERNAL_WORK_ITEM_FN(get_global_size) DECL_INTERNAL_WORK_ITEM_FN(get_global_offset) DECL_INTERNAL_WORK_ITEM_FN(get_num_groups) #undef DECL_INTERNAL_WORK_ITEM_FN #define DECL_PUBLIC_WORK_ITEM_FN(NAME, OTHER_RET) \ INLINE unsigned NAME(unsigned int dim) { \ if (dim == 0) return __gen_ocl_##NAME##0(); \ else if (dim == 1) return __gen_ocl_##NAME##1(); \ else if (dim == 2) return __gen_ocl_##NAME##2(); \ else return OTHER_RET; \ } DECL_PUBLIC_WORK_ITEM_FN(get_group_id, 0) DECL_PUBLIC_WORK_ITEM_FN(get_local_id, 0) DECL_PUBLIC_WORK_ITEM_FN(get_local_size, 1) DECL_PUBLIC_WORK_ITEM_FN(get_global_size, 1) DECL_PUBLIC_WORK_ITEM_FN(get_global_offset, 0) DECL_PUBLIC_WORK_ITEM_FN(get_num_groups, 1) #undef DECL_PUBLIC_WORK_ITEM_FN INLINE uint get_global_id(uint dim) { return get_local_id(dim) + get_local_size(dim) * get_group_id(dim) + get_global_offset(dim); } ///////////////////////////////////////////////////////////////////////////// // Math Functions (see 6.11.2 of OCL 1.1 spec) ///////////////////////////////////////////////////////////////////////////// PURE CONST float __gen_ocl_fabs(float x); PURE CONST float __gen_ocl_sin(float x); PURE CONST float __gen_ocl_cos(float x); PURE CONST float __gen_ocl_sqrt(float x); PURE CONST float __gen_ocl_rsqrt(float x); PURE CONST float __gen_ocl_log(float x); PURE CONST float __gen_ocl_pow(float x, float y); PURE CONST float __gen_ocl_rcp(float x); PURE CONST float __gen_ocl_rndz(float x); PURE CONST float __gen_ocl_rnde(float x); PURE CONST float __gen_ocl_rndu(float x); PURE CONST float __gen_ocl_rndd(float x); INLINE_OVERLOADABLE float hypot(float x, float y) { return __gen_ocl_sqrt(x*x + y*y); } INLINE_OVERLOADABLE float native_cos(float x) { return __gen_ocl_cos(x); } INLINE_OVERLOADABLE float __gen_ocl_internal_cospi(float x) { return __gen_ocl_cos(x * M_PI_F); } INLINE_OVERLOADABLE float native_sin(float x) { return __gen_ocl_sin(x); } INLINE_OVERLOADABLE float __gen_ocl_internal_sinpi(float x) { /* * ==================================================== * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. * * Developed at SunPro, a Sun Microsystems, Inc. business. * Permission to use, copy, modify, and distribute this * software is freely granted, provided that this notice * is preserved. * ==================================================== */ float y, z; int n, ix; ix = *(int *) (&x) & 0x7fffffff; if (ix < 0x3e800000) return __gen_ocl_sin(M_PI_F * x); y = -x; z = __gen_ocl_rndd(y); if (z != y) { y *= 0.5f; y = 2.f * (y - __gen_ocl_rndd(y)); n = y * 4.f; } else { if (ix >= 0x4b800000) { y = 0; n = 0; } else { if (ix < 0x4b000000) z = y + 8.3886080000e+06f; int n = *(int *) (&z); n &= 1; y = n; n <<= 2; } } switch (n) { case 0: y = __gen_ocl_sin(M_PI_F * y); break; case 1: case 2: y = __gen_ocl_cos(M_PI_F * (0.5f - y)); break; case 3: case 4: y = __gen_ocl_sin(M_PI_F * (1.f - y)); break; case 5: case 6: y = -__gen_ocl_cos(M_PI_F * (y - 1.5f)); break; default: y = __gen_ocl_sin(M_PI_F * (y - 2.f)); break; } return -y; } INLINE_OVERLOADABLE float native_sqrt(float x) { return __gen_ocl_sqrt(x); } INLINE_OVERLOADABLE float native_rsqrt(float x) { return __gen_ocl_rsqrt(x); } INLINE_OVERLOADABLE float native_log2(float x) { return __gen_ocl_log(x); } INLINE_OVERLOADABLE float native_log(float x) { return native_log2(x) * 0.6931472002f; } INLINE_OVERLOADABLE float tgamma(float x) { /* * ==================================================== * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. * * Developed at SunPro, a Sun Microsystems, Inc. business. * Permission to use, copy, modify, and distribute this * software is freely granted, provided that this notice * is preserved. * ==================================================== */ float pi = 3.1415927410e+00, a0 = 7.7215664089e-02, a1 = 3.2246702909e-01, a2 = 6.7352302372e-02, a3 = 2.0580807701e-02, a4 = 7.3855509982e-03, a5 = 2.8905137442e-03, a6 = 1.1927076848e-03, a7 = 5.1006977446e-04, a8 = 2.2086278477e-04, a9 = 1.0801156895e-04, a10 = 2.5214456400e-05, a11 = 4.4864096708e-05, tc = 1.4616321325e+00, tf = -1.2148628384e-01, tt = 6.6971006518e-09, t0 = 4.8383611441e-01, t1 = -1.4758771658e-01, t2 = 6.4624942839e-02, t3 = -3.2788541168e-02, t4 = 1.7970675603e-02, t5 = -1.0314224288e-02, t6 = 6.1005386524e-03, t7 = -3.6845202558e-03, t8 = 2.2596477065e-03, t9 = -1.4034647029e-03, t10 = 8.8108185446e-04, t11 = -5.3859531181e-04, t12 = 3.1563205994e-04, t13 = -3.1275415677e-04, t14 = 3.3552918467e-04, u0 = -7.7215664089e-02, u1 = 6.3282704353e-01, u2 = 1.4549225569e+00, u3 = 9.7771751881e-01, u4 = 2.2896373272e-01, u5 = 1.3381091878e-02, v1 = 2.4559779167e+00, v2 = 2.1284897327e+00, v3 = 7.6928514242e-01, v4 = 1.0422264785e-01, v5 = 3.2170924824e-03, s0 = -7.7215664089e-02, s1 = 2.1498242021e-01, s2 = 3.2577878237e-01, s3 = 1.4635047317e-01, s4 = 2.6642270386e-02, s5 = 1.8402845599e-03, s6 = 3.1947532989e-05, r1 = 1.3920053244e+00, r2 = 7.2193557024e-01, r3 = 1.7193385959e-01, r4 = 1.8645919859e-02, r5 = 7.7794247773e-04, r6 = 7.3266842264e-06, w0 = 4.1893854737e-01, w1 = 8.3333335817e-02, w2 = -2.7777778450e-03, w3 = 7.9365057172e-04, w4 = -5.9518753551e-04, w5 = 8.3633989561e-04, w6 = -1.6309292987e-03; float t, y, z, nadj, p, p1, p2, p3, q, r, w; int i, hx, ix; nadj = 0; hx = *(int *) (&x); ix = hx & 0x7fffffff; if (ix >= 0x7f800000) return x * x; if (ix == 0) return INFINITY; if (ix < 0x1c800000) { if (hx < 0) { return - native_log(-x); } else return - native_log(x); } if (hx < 0) { if (ix >= 0x4b000000) return INFINITY; t = __gen_ocl_internal_sinpi(x); if (__gen_ocl_fabs(t) < 1e-8f) return INFINITY; nadj = native_log(M_PI_F / __gen_ocl_fabs(t * x)); x = -x; } if (ix == 0x3f800000 || ix == 0x40000000) r = 0; else if (ix < 0x40000000) { if (ix <= 0x3f666666) { r = - native_log(x); if (ix >= 0x3f3b4a20) { y = 1 - x; i = 0; } else if (ix >= 0x3e6d3308) { y = x - (tc - 1); i = 1; } else { y = x; i = 2; } } else { r = 0; if (ix >= 0x3fdda618) { y = 2 - x; i = 0; } else if (ix >= 0x3F9da620) { y = x - tc; i = 1; } else { y = x - 1; i = 2; } } switch (i) { case 0: z = y * y; p1 = a0 + z * (a2 + z * (a4 + z * (a6 + z * (a8 + z * a10)))); p2 = z * (a1 + z * (a3 + z * (a5 + z * (a7 + z * (a9 + z * a11))))); p = y * p1 + p2; r += (p - .5f * y); break; case 1: z = y * y; w = z * y; p1 = t0 + w * (t3 + w * (t6 + w * (t9 + w * t12))); p2 = t1 + w * (t4 + w * (t7 + w * (t10 + w * t13))); p3 = t2 + w * (t5 + w * (t8 + w * (t11 + w * t14))); p = z * p1 - (tt - w * (p2 + y * p3)); r += (tf + p); break; case 2: p1 = y * (u0 + y * (u1 + y * (u2 + y * (u3 + y * (u4 + y * u5))))); p2 = 1 + y * (v1 + y * (v2 + y * (v3 + y * (v4 + y * v5)))); r += (-.5f * y + p1 / p2); } } else if (ix < 0x41000000) { i = x; t = 0; y = x - i; p = y*(s0+y*(s1+y*(s2+y*(s3+y*(s4+y*(s5+y*s6)))))); q = 1 + y * (r1 + y * (r2 + y * (r3 + y * (r4 + y * (r5 + y * r6))))); r = .5f * y + p / q; z = 1; switch (i) { case 7: z *= (y + 6.f); case 6: z *= (y + 5.f); case 5: z *= (y + 4.f); case 4: z *= (y + 3.f); case 3: z *= (y + 2.f); r += native_log(z); break; } } else if (ix < 0x5c800000) { t = native_log(x); z = 1 / x; y = z * z; w = w0 + z * (w1 + y * (w2 + y * (w3 + y * (w4 + y * (w5 + y * w6))))); r = (x - .5f) * (t - 1) + w; } else r = x * (native_log(x) - 1); if (hx < 0) r = nadj - r; return r; } INLINE_OVERLOADABLE float lgamma(float x) { /* * ==================================================== * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. * * Developed at SunPro, a Sun Microsystems, Inc. business. * Permission to use, copy, modify, and distribute this * software is freely granted, provided that this notice * is preserved. * ==================================================== */ const float zero= 0., one = 1.0000000000e+00, pi = 3.1415927410e+00, a0 = 7.7215664089e-02, a1 = 3.2246702909e-01, a2 = 6.7352302372e-02, a3 = 2.0580807701e-02, a4 = 7.3855509982e-03, a5 = 2.8905137442e-03, a6 = 1.1927076848e-03, a7 = 5.1006977446e-04, a8 = 2.2086278477e-04, a9 = 1.0801156895e-04, a10 = 2.5214456400e-05, a11 = 4.4864096708e-05, tc = 1.4616321325e+00, tf = -1.2148628384e-01, tt = 6.6971006518e-09, t0 = 4.8383611441e-01, t1 = -1.4758771658e-01, t2 = 6.4624942839e-02, t3 = -3.2788541168e-02, t4 = 1.7970675603e-02, t5 = -1.0314224288e-02, t6 = 6.1005386524e-03, t7 = -3.6845202558e-03, t8 = 2.2596477065e-03, t9 = -1.4034647029e-03, t10 = 8.8108185446e-04, t11 = -5.3859531181e-04, t12 = 3.1563205994e-04, t13 = -3.1275415677e-04, t14 = 3.3552918467e-04, u0 = -7.7215664089e-02, u1 = 6.3282704353e-01, u2 = 1.4549225569e+00, u3 = 9.7771751881e-01, u4 = 2.2896373272e-01, u5 = 1.3381091878e-02, v1 = 2.4559779167e+00, v2 = 2.1284897327e+00, v3 = 7.6928514242e-01, v4 = 1.0422264785e-01, v5 = 3.2170924824e-03, s0 = -7.7215664089e-02, s1 = 2.1498242021e-01, s2 = 3.2577878237e-01, s3 = 1.4635047317e-01, s4 = 2.6642270386e-02, s5 = 1.8402845599e-03, s6 = 3.1947532989e-05, r1 = 1.3920053244e+00, r2 = 7.2193557024e-01, r3 = 1.7193385959e-01, r4 = 1.8645919859e-02, r5 = 7.7794247773e-04, r6 = 7.3266842264e-06, w0 = 4.1893854737e-01, w1 = 8.3333335817e-02, w2 = -2.7777778450e-03, w3 = 7.9365057172e-04, w4 = -5.9518753551e-04, w5 = 8.3633989561e-04, w6 = -1.6309292987e-03; float t, y, z, nadj, p, p1, p2, p3, q, r, w; int i, hx, ix; nadj = 0; hx = *(int *)&x; ix = hx & 0x7fffffff; if (ix >= 0x7f800000) return x * x; if (ix == 0) return ((x + one) / zero); if (ix < 0x1c800000) { if (hx < 0) { return -native_log(-x); } else return -native_log(x); } if (hx < 0) { if (ix >= 0x4b000000) return ((-x) / zero); t = __gen_ocl_internal_sinpi(x); if (t == zero) return ((-x) / zero); nadj = native_log(pi / __gen_ocl_fabs(t * x)); x = -x; } if (ix == 0x3f800000 || ix == 0x40000000) r = 0; else if (ix < 0x40000000) { if (ix <= 0x3f666666) { r = -native_log(x); if (ix >= 0x3f3b4a20) { y = one - x; i = 0; } else if (ix >= 0x3e6d3308) { y = x - (tc - one); i = 1; } else { y = x; i = 2; } } else { r = zero; if (ix >= 0x3fdda618) { y = (float) 2.0 - x; i = 0; } else if (ix >= 0x3F9da620) { y = x - tc; i = 1; } else { y = x - one; i = 2; } } switch (i) { case 0: z = y * y; p1 = a0 + z * (a2 + z * (a4 + z * (a6 + z * (a8 + z * a10)))); p2 = z * (a1 + z * (a3 + z * (a5 + z * (a7 + z * (a9 + z * a11))))); p = y * p1 + p2; r += (p - (float) 0.5 * y); break; case 1: z = y * y; w = z * y; p1 = t0 + w * (t3 + w * (t6 + w * (t9 + w * t12))); p2 = t1 + w * (t4 + w * (t7 + w * (t10 + w * t13))); p3 = t2 + w * (t5 + w * (t8 + w * (t11 + w * t14))); p = z * p1 - (tt - w * (p2 + y * p3)); r += (tf + p); break; case 2: p1 = y * (u0 + y * (u1 + y * (u2 + y * (u3 + y * (u4 + y * u5))))); p2 = one + y * (v1 + y * (v2 + y * (v3 + y * (v4 + y * v5)))); r += (-(float) 0.5 * y + p1 / p2); } } else if (ix < 0x41000000) { i = (int) x; t = zero; y = x - (float) i; p = y * (s0 + y * (s1 + y * (s2 + y * (s3 + y * (s4 + y * (s5 + y * s6)))))); q = one + y * (r1 + y * (r2 + y * (r3 + y * (r4 + y * (r5 + y * r6))))); r = .5f * y + p / q; z = one; switch (i) { case 7: z *= (y + (float) 6.0); case 6: z *= (y + (float) 5.0); case 5: z *= (y + (float) 4.0); case 4: z *= (y + (float) 3.0); case 3: z *= (y + (float) 2.0); r += native_log(z); break; } } else if (ix < 0x5c800000) { t = native_log(x); z = one / x; y = z * z; w = w0 + z * (w1 + y * (w2 + y * (w3 + y * (w4 + y * (w5 + y * w6))))); r = (x - .5f) * (t - one) + w; } else r = x * (native_log(x) - one); if (hx < 0) r = nadj - r; return r; } /* * ==================================================== * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. * * Developed at SunPro, a Sun Microsystems, Inc. business. * Permission to use, copy, modify, and distribute this * software is freely granted, provided that this notice * is preserved. * ==================================================== */ #define BODY \ const float \ zero= 0., \ one = 1.0000000000e+00, \ pi = 3.1415927410e+00, \ a0 = 7.7215664089e-02, \ a1 = 3.2246702909e-01, \ a2 = 6.7352302372e-02, \ a3 = 2.0580807701e-02, \ a4 = 7.3855509982e-03, \ a5 = 2.8905137442e-03, \ a6 = 1.1927076848e-03, \ a7 = 5.1006977446e-04, \ a8 = 2.2086278477e-04, \ a9 = 1.0801156895e-04, \ a10 = 2.5214456400e-05, \ a11 = 4.4864096708e-05, \ tc = 1.4616321325e+00, \ tf = -1.2148628384e-01, \ tt = 6.6971006518e-09, \ t0 = 4.8383611441e-01, \ t1 = -1.4758771658e-01, \ t2 = 6.4624942839e-02, \ t3 = -3.2788541168e-02, \ t4 = 1.7970675603e-02, \ t5 = -1.0314224288e-02, \ t6 = 6.1005386524e-03, \ t7 = -3.6845202558e-03, \ t8 = 2.2596477065e-03, \ t9 = -1.4034647029e-03, \ t10 = 8.8108185446e-04, \ t11 = -5.3859531181e-04, \ t12 = 3.1563205994e-04, \ t13 = -3.1275415677e-04, \ t14 = 3.3552918467e-04, \ u0 = -7.7215664089e-02, \ u1 = 6.3282704353e-01, \ u2 = 1.4549225569e+00, \ u3 = 9.7771751881e-01, \ u4 = 2.2896373272e-01, \ u5 = 1.3381091878e-02, \ v1 = 2.4559779167e+00, \ v2 = 2.1284897327e+00, \ v3 = 7.6928514242e-01, \ v4 = 1.0422264785e-01, \ v5 = 3.2170924824e-03, \ s0 = -7.7215664089e-02, \ s1 = 2.1498242021e-01, \ s2 = 3.2577878237e-01, \ s3 = 1.4635047317e-01, \ s4 = 2.6642270386e-02, \ s5 = 1.8402845599e-03, \ s6 = 3.1947532989e-05, \ r1 = 1.3920053244e+00, \ r2 = 7.2193557024e-01, \ r3 = 1.7193385959e-01, \ r4 = 1.8645919859e-02, \ r5 = 7.7794247773e-04, \ r6 = 7.3266842264e-06, \ w0 = 4.1893854737e-01, \ w1 = 8.3333335817e-02, \ w2 = -2.7777778450e-03, \ w3 = 7.9365057172e-04, \ w4 = -5.9518753551e-04, \ w5 = 8.3633989561e-04, \ w6 = -1.6309292987e-03; \ float t, y, z, nadj, p, p1, p2, p3, q, r, w; \ int i, hx, ix; \ nadj = 0; \ hx = *(int *)&x; \ *signgamp = 1; \ ix = hx & 0x7fffffff; \ if (ix >= 0x7f800000) \ return x * x; \ if (ix == 0) \ return ((x + one) / zero); \ if (ix < 0x1c800000) { \ if (hx < 0) { \ *signgamp = -1; \ return -native_log(-x); \ } else \ return -native_log(x); \ } \ if (hx < 0) { \ if (ix >= 0x4b000000) \ return ((-x) / zero); \ t = __gen_ocl_internal_sinpi(x); \ if (t == zero) \ return ((-x) / zero); \ nadj = native_log(pi / __gen_ocl_fabs(t * x)); \ if (t < zero) \ *signgamp = -1; \ x = -x; \ } \ if (ix == 0x3f800000 || ix == 0x40000000) \ r = 0; \ else if (ix < 0x40000000) { \ if (ix <= 0x3f666666) { \ r = -native_log(x); \ if (ix >= 0x3f3b4a20) { \ y = one - x; \ i = 0; \ } else if (ix >= 0x3e6d3308) { \ y = x - (tc - one); \ i = 1; \ } else { \ y = x; \ i = 2; \ } \ } else { \ r = zero; \ if (ix >= 0x3fdda618) { \ y = (float) 2.0 - x; \ i = 0; \ } \ else if (ix >= 0x3F9da620) { \ y = x - tc; \ i = 1; \ } \ else { \ y = x - one; \ i = 2; \ } \ } \ switch (i) { \ case 0: \ z = y * y; \ p1 = a0 + z * (a2 + z * (a4 + z * (a6 + z * (a8 + z * a10)))); \ p2 = z * (a1 + z * (a3 + z * (a5 + z * (a7 + z * (a9 + z * a11))))); \ p = y * p1 + p2; \ r += (p - (float) 0.5 * y); \ break; \ case 1: \ z = y * y; \ w = z * y; \ p1 = t0 + w * (t3 + w * (t6 + w * (t9 + w * t12))); \ p2 = t1 + w * (t4 + w * (t7 + w * (t10 + w * t13))); \ p3 = t2 + w * (t5 + w * (t8 + w * (t11 + w * t14))); \ p = z * p1 - (tt - w * (p2 + y * p3)); \ r += (tf + p); \ break; \ case 2: \ p1 = y * (u0 + y * (u1 + y * (u2 + y * (u3 + y * (u4 + y * u5))))); \ p2 = one + y * (v1 + y * (v2 + y * (v3 + y * (v4 + y * v5)))); \ r += (-(float) 0.5 * y + p1 / p2); \ } \ } else if (ix < 0x41000000) { \ i = (int) x; \ t = zero; \ y = x - (float) i; \ p = y * (s0 + y * (s1 + y * (s2 + y * (s3 + y * (s4 + y * (s5 + y * s6)))))); \ q = one + y * (r1 + y * (r2 + y * (r3 + y * (r4 + y * (r5 + y * r6))))); \ r = .5f * y + p / q; \ z = one; \ switch (i) { \ case 7: \ z *= (y + (float) 6.0); \ case 6: \ z *= (y + (float) 5.0); \ case 5: \ z *= (y + (float) 4.0); \ case 4: \ z *= (y + (float) 3.0); \ case 3: \ z *= (y + (float) 2.0); \ r += native_log(z); \ break; \ } \ \ } else if (ix < 0x5c800000) { \ t = native_log(x); \ z = one / x; \ y = z * z; \ w = w0 + z * (w1 + y * (w2 + y * (w3 + y * (w4 + y * (w5 + y * w6))))); \ r = (x - .5f) * (t - one) + w; \ } else \ r = x * (native_log(x) - one); \ if (hx < 0) \ r = nadj - r; \ return r; INLINE_OVERLOADABLE float lgamma_r(float x, global int *signgamp) { BODY; } INLINE_OVERLOADABLE float lgamma_r(float x, local int *signgamp) { BODY; } INLINE_OVERLOADABLE float lgamma_r(float x, private int *signgamp) { BODY; } #undef BODY INLINE_OVERLOADABLE float native_log10(float x) { return native_log2(x) * 0.3010299956f; } INLINE_OVERLOADABLE float log1p(float x) { return native_log(x + 1); } INLINE_OVERLOADABLE float logb(float x) { return __gen_ocl_rndd(native_log2(x)); } INLINE_OVERLOADABLE int ilogb(float x) { return __gen_ocl_rndd(native_log2(x)); } INLINE_OVERLOADABLE float nan(uint code) { return NAN; } INLINE_OVERLOADABLE float native_powr(float x, float y) { return __gen_ocl_pow(x,y); } INLINE_OVERLOADABLE float native_recip(float x) { return __gen_ocl_rcp(x); } INLINE_OVERLOADABLE float native_tan(float x) { return native_sin(x) / native_cos(x); } INLINE_OVERLOADABLE float __gen_ocl_internal_tanpi(float x) { return native_tan(x * M_PI_F); } INLINE_OVERLOADABLE float native_exp(float x) { return __gen_ocl_pow(M_E_F, x); } INLINE_OVERLOADABLE float native_exp2(float x) { return __gen_ocl_pow(2, x); } INLINE_OVERLOADABLE float native_exp10(float x) { return __gen_ocl_pow(10, x); } INLINE_OVERLOADABLE float __gen_ocl_internal_expm1(float x) { return __gen_ocl_pow(M_E_F, x) - 1; } INLINE_OVERLOADABLE float __gen_ocl_internal_cbrt(float x) { return __gen_ocl_pow(x, 0.3333333333f); } #define BODY \ *cosval = native_cos(x); \ return native_sin(x); INLINE_OVERLOADABLE float sincos(float x, global float *cosval) { BODY; } INLINE_OVERLOADABLE float sincos(float x, local float *cosval) { BODY; } INLINE_OVERLOADABLE float sincos(float x, private float *cosval) { BODY; } #undef BODY INLINE_OVERLOADABLE float __gen_ocl_internal_sinh(float x) { return (1 - native_exp(-2 * x)) / (2 * native_exp(-x)); } INLINE_OVERLOADABLE float __gen_ocl_internal_cosh(float x) { return (1 + native_exp(-2 * x)) / (2 * native_exp(-x)); } INLINE_OVERLOADABLE float __gen_ocl_internal_tanh(float x) { float y = native_exp(-2 * x); return (1 - y) / (1 + y); } typedef union { float value; int word; } ieee_float_shape_type; #ifndef GET_FLOAT_WORD #define GET_FLOAT_WORD(i,d) \ do { \ ieee_float_shape_type gf_u; \ gf_u.value = (d); \ (i) = gf_u.word; \ } while (0) #endif INLINE_OVERLOADABLE float __gen_ocl_internal_asin(float x) { int hx, ix; GET_FLOAT_WORD(hx,x); ix = hx&0x7fffffff; if(ix == 0x3f800000) { return x * M_PI_2_F; /* asin(|1|)=+-pi/2 with inexact */ } if(ix > 0x3f800000) { /* |x|>= 1 */ return (x-x) / (x-x); /* asin(|x|>1) is NaN */ } if(ix < 0x32000000) { /* if |x| < 2**-27 */ if(HUGE_VALF + x > FLT_ONE) return x; /* return x with inexact if x!=0*/ } /* 1 > |x| >= 2**-27 */ float sum = x, c = x, m = 1.0; int n = 1; do { c *= (2 * n - 1) * x * x; m *= (2 * n); sum += ( c / m / (2 * n + 1)); n++; }while( n < 30); return sum; } INLINE_OVERLOADABLE float __gen_ocl_internal_asinpi(float x) { return __gen_ocl_internal_asin(x) / M_PI_F; } INLINE_OVERLOADABLE float __gen_ocl_internal_acos(float x) { return M_PI_2_F - __gen_ocl_internal_asin(x); } INLINE_OVERLOADABLE float __gen_ocl_internal_acospi(float x) { return __gen_ocl_internal_acos(x) / M_PI_F; } INLINE_OVERLOADABLE float __gen_ocl_internal_atan(float x) { float a = 0, c = 1; if (x <= -1) { a = - M_PI_2_F; x = 1 / x; c = -1; } if (x >= 1) { a = M_PI_2_F; x = 1 / x; c = -1; } a += c*x; int i; int sign; for(i=3, sign=-1; i<63; i+=2, sign=-sign) { a += c*sign*__gen_ocl_pow(x,i)/i; } return a; } INLINE_OVERLOADABLE float __gen_ocl_internal_atanpi(float x) { return __gen_ocl_internal_atan(x) / M_PI_F; } INLINE_OVERLOADABLE float __gen_ocl_internal_asinh(float x) { return native_log(x + native_sqrt(x * x + 1)); } INLINE_OVERLOADABLE float __gen_ocl_internal_acosh(float x) { return native_log(x + native_sqrt(x + 1) * native_sqrt(x - 1)); } INLINE_OVERLOADABLE float __gen_ocl_internal_atanh(float x) { return 0.5f * native_sqrt((1 + x) / (1 - x)); } INLINE_OVERLOADABLE float __gen_ocl_internal_copysign(float x, float y) { return x * y < 0 ? -x : x; } INLINE_OVERLOADABLE float __gen_ocl_internal_erf(float x) { return M_2_SQRTPI_F * (x - __gen_ocl_pow(x, 3) / 3 + __gen_ocl_pow(x, 5) / 10 - __gen_ocl_pow(x, 7) / 42 + __gen_ocl_pow(x, 9) / 216); } INLINE_OVERLOADABLE float __gen_ocl_internal_erfc(float x) { return 1 - __gen_ocl_internal_erf(x); } // XXX work-around PTX profile #define sqrt native_sqrt INLINE_OVERLOADABLE float rsqrt(float x) { return native_rsqrt(x); } INLINE_OVERLOADABLE float __gen_ocl_internal_atan2(float y, float x) { uint hx = *(uint *)(&x), ix = hx & 0x7FFFFFFF; uint hy = *(uint *)(&y), iy = hy & 0x7FFFFFFF; if (ix > 0x7F800000 || iy > 0x7F800000) return nan(0u); if (ix == 0) { if (y > 0) return M_PI_2_F; if (y < 0) return - M_PI_2_F; return nan(0u); } else { float z = __gen_ocl_internal_atan(y / x); if (x > 0) return z; if (y >= 0) return M_PI_F + z; return - M_PI_F + z; } } INLINE_OVERLOADABLE float __gen_ocl_internal_atan2pi(float y, float x) { uint ix = as_uint(x), iy = as_uint(y), pos_zero = 0, neg_zero = 0x80000000u, pos_inf = 0x7f800000, neg_inf = 0xff800000u; if(iy == pos_zero) { if(ix == pos_zero) return 0; if(ix == neg_zero) return 1; if(x < 0) return 1; if(x > 0) return 0; } if(iy == neg_zero) { if(ix == pos_zero) return -0.f; if(ix == neg_zero) return -1; if(x < 0) return -1; if(x > 0) return -0.f; } if((ix & 0x7fffffff) == 0) { if(y < 0) return -.5f; if(y > 0) return .5f; } if(ix == pos_inf) { if(y > 0 && iy != pos_inf) return 0; if(y < 0 && iy != neg_inf) return -0.f; } if(ix == neg_inf) { if(y > 0 && iy != pos_inf) return 1; if(y < 0 && iy != neg_inf) return -1; } if(iy == pos_inf) { if(ix == pos_inf) return 0.25f; if(ix == neg_inf) return 0.75f; if(x >= 0 || x <= 0) return 0.5f; } if(iy == neg_inf) { if(ix == pos_inf) return -0.25f; if(ix == neg_inf) return -0.75f; if(x >= 0 || x <= 0) return -0.5f; } return __gen_ocl_internal_atan2(y, x) / M_PI_F; } INLINE_OVERLOADABLE float __gen_ocl_internal_fabs(float x) { return __gen_ocl_fabs(x); } INLINE_OVERLOADABLE float __gen_ocl_internal_trunc(float x) { return __gen_ocl_rndz(x); } INLINE_OVERLOADABLE float __gen_ocl_internal_round(float x) { return __gen_ocl_rnde(x); } INLINE_OVERLOADABLE float __gen_ocl_internal_floor(float x) { return __gen_ocl_rndd(x); } INLINE_OVERLOADABLE float __gen_ocl_internal_ceil(float x) { return __gen_ocl_rndu(x); } INLINE_OVERLOADABLE float __gen_ocl_internal_log(float x) { return native_log(x); } INLINE_OVERLOADABLE float __gen_ocl_internal_log2(float x) { return native_log2(x); } INLINE_OVERLOADABLE float __gen_ocl_internal_log10(float x) { return native_log10(x); } INLINE_OVERLOADABLE float __gen_ocl_internal_exp(float x) { return native_exp(x); } INLINE_OVERLOADABLE float powr(float x, float y) { return __gen_ocl_pow(x,y); } INLINE_OVERLOADABLE float fmod(float x, float y) { return x-y*__gen_ocl_rndz(x/y); } INLINE_OVERLOADABLE float remainder(float x, float y) { return x-y*__gen_ocl_rnde(x/y); } INLINE_OVERLOADABLE float __gen_ocl_internal_rint(float x) { return 2 * __gen_ocl_internal_round(x / 2); } // TODO use llvm intrinsics definitions #define cos native_cos #define cospi __gen_ocl_internal_cospi #define cosh __gen_ocl_internal_cosh #define acos __gen_ocl_internal_acos #define acospi __gen_ocl_internal_acospi #define acosh __gen_ocl_internal_acosh #define sin native_sin #define sinpi __gen_ocl_internal_sinpi #define sinh __gen_ocl_internal_sinh #define asin __gen_ocl_internal_asin #define asinpi __gen_ocl_internal_asinpi #define asinh __gen_ocl_internal_asinh #define tan native_tan #define tanpi __gen_ocl_internal_tanpi #define tanh __gen_ocl_internal_tanh #define atan __gen_ocl_internal_atan #define atan2 __gen_ocl_internal_atan2 #define atan2pi __gen_ocl_internal_atan2pi #define atanpi __gen_ocl_internal_atanpi #define atanh __gen_ocl_internal_atanh #define pow powr #define cbrt __gen_ocl_internal_cbrt #define rint __gen_ocl_internal_rint #define copysign __gen_ocl_internal_copysign #define erf __gen_ocl_internal_erf #define erfc __gen_ocl_internal_erfc INLINE_OVERLOADABLE float mad(float a, float b, float c) { return a*b+c; } #define DEF(TYPE1, TYPE2) \ INLINE_OVERLOADABLE TYPE1 select(TYPE1 src0, TYPE1 src1, TYPE2 cond) { \ return cond ? src1 : src0; \ } DEF(char, char) DEF(char, uchar) DEF(uchar, char) DEF(uchar, uchar) DEF(short, short) DEF(short, ushort) DEF(ushort, short) DEF(ushort, ushort) DEF(int, int) DEF(int, uint) DEF(uint, int) DEF(uint, uint) DEF(long, long) DEF(long, ulong) DEF(ulong, long) DEF(ulong, ulong) DEF(float, int) DEF(float, uint) #undef DEF ///////////////////////////////////////////////////////////////////////////// // Common Functions (see 6.11.4 of OCL 1.1 spec) ///////////////////////////////////////////////////////////////////////////// INLINE_OVERLOADABLE float step(float edge, float x) { return x < edge ? 0.0 : 1.0; } #define DECL_MIN_MAX_CLAMP(TYPE) \ INLINE_OVERLOADABLE TYPE max(TYPE a, TYPE b) { \ return a > b ? a : b; \ } \ INLINE_OVERLOADABLE TYPE min(TYPE a, TYPE b) { \ return a < b ? a : b; \ } \ INLINE_OVERLOADABLE TYPE clamp(TYPE v, TYPE l, TYPE u) { \ return max(min(v, u), l); \ } DECL_MIN_MAX_CLAMP(float) DECL_MIN_MAX_CLAMP(int) DECL_MIN_MAX_CLAMP(short) DECL_MIN_MAX_CLAMP(char) DECL_MIN_MAX_CLAMP(uint) DECL_MIN_MAX_CLAMP(unsigned short) DECL_MIN_MAX_CLAMP(unsigned char) DECL_MIN_MAX_CLAMP(long) DECL_MIN_MAX_CLAMP(ulong) #undef DECL_MIN_MAX_CLAMP #define BODY \ uint u = as_uint(x); \ if ((u & 0x7FFFFFFFu) == 0) { \ *exp = 0; \ return x; \ } \ int e = (u >> 23) & 255; \ if (e == 255) \ return x; \ *exp = e - 126; \ u = (u & (0x807FFFFFu)) | 0x3F000000; \ return as_float(u); INLINE_OVERLOADABLE float frexp(float x, global int *exp) { BODY; } INLINE_OVERLOADABLE float frexp(float x, local int *exp) { BODY; } INLINE_OVERLOADABLE float frexp(float x, private int *exp) { BODY; } #undef BODY INLINE_OVERLOADABLE float nextafter(float x, float y) { uint hx = as_uint(x), ix = hx & 0x7FFFFFFF; uint hy = as_uint(y), iy = hy & 0x7FFFFFFF; if (ix > 0x7F800000 || iy > 0x7F800000) return nan(0u); if (hx == hy) return x; if (ix == 0) return as_float((hy & 0x80000000u) | 1); if (((0 == (hx & 0x80000000u)) && y > x) || ((hx & 0x80000000u) && y < x)) hx ++; else hx --; return as_float(hx); } #define BODY \ uint hx = as_uint(x), ix = hx & 0x7FFFFFFF; \ if (ix > 0x7F800000) { \ *i = nan(0u); \ return nan(0u); \ } \ if (ix == 0x7F800000) { \ *i = x; \ return as_float(hx & 0x80000000u); \ } \ *i = __gen_ocl_rndz(x); \ return x - *i; INLINE_OVERLOADABLE float modf(float x, global float *i) { BODY; } INLINE_OVERLOADABLE float modf(float x, local float *i) { BODY; } INLINE_OVERLOADABLE float modf(float x, private float *i) { BODY; } #undef BODY INLINE_OVERLOADABLE float degrees(float radians) { return (180 / M_PI_F) * radians; } INLINE_OVERLOADABLE float radians(float degrees) { return (M_PI_F / 180) * degrees; } INLINE_OVERLOADABLE float smoothstep(float e0, float e1, float x) { x = clamp((x - e0) / (e1 - e0), 0.f, 1.f); return x * x * (3 - 2 * x); } INLINE_OVERLOADABLE float sign(float x) { if(x > 0) return 1; if(x < 0) return -1; if(x == -0.f) return -0.f; return 0.f; } INLINE_OVERLOADABLE float __gen_ocl_internal_fmax(float a, float b) { return max(a,b); } INLINE_OVERLOADABLE float __gen_ocl_internal_fmin(float a, float b) { return min(a,b); } INLINE_OVERLOADABLE float __gen_ocl_internal_maxmag(float x, float y) { float a = __gen_ocl_fabs(x), b = __gen_ocl_fabs(y); return a > b ? x : b > a ? y : max(x, y); } INLINE_OVERLOADABLE float __gen_ocl_internal_minmag(float x, float y) { float a = __gen_ocl_fabs(x), b = __gen_ocl_fabs(y); return a < b ? x : b < a ? y : min(x, y); } INLINE_OVERLOADABLE float mix(float x, float y, float a) { return x + (y-x)*a;} INLINE_OVERLOADABLE float __gen_ocl_internal_fdim(float x, float y) { return __gen_ocl_internal_fmax(x, y) - y; } #define BODY \ *p = __gen_ocl_internal_floor(x); \ return __gen_ocl_internal_fmin(x - *p, 0x1.FFFFFep-1F); INLINE_OVERLOADABLE float fract(float x, global float *p) { BODY; } INLINE_OVERLOADABLE float fract(float x, local float *p) { BODY; } INLINE_OVERLOADABLE float fract(float x, private float *p) { BODY; } #undef BODY #define BODY \ uint hx = as_uint(x), ix = hx & 0x7FFFFFFF, hy = as_uint(y), iy = hy & 0x7FFFFFFF; \ if (ix > 0x7F800000 || iy > 0x7F800000 || ix == 0x7F800000 || iy == 0) \ return nan(0u); \ float k = x / y; \ int q = __gen_ocl_rnde(k); \ *quo = q >= 0 ? (q & 127) : (q | 0xFFFFFF80u); \ float r = x - q * y; \ uint hr = as_uint(r), ir = hr & 0x7FFFFFFF; \ if (ir == 0) \ hr = ir | (hx & 0x80000000u); \ return as_float(hr); INLINE_OVERLOADABLE float remquo(float x, float y, global int *quo) { BODY; } INLINE_OVERLOADABLE float remquo(float x, float y, local int *quo) { BODY; } INLINE_OVERLOADABLE float remquo(float x, float y, private int *quo) { BODY; } #undef BODY INLINE_OVERLOADABLE float native_divide(float x, float y) { return x/y; } INLINE_OVERLOADABLE float ldexp(float x, int n) { return __gen_ocl_pow(2, n) * x; } INLINE_OVERLOADABLE float pown(float x, int n) { if (x == 0 && n == 0) return 1; return powr(x, n); } INLINE_OVERLOADABLE float rootn(float x, int n) { return powr(x, 1.f / n); } ///////////////////////////////////////////////////////////////////////////// // Geometric functions (see 6.11.5 of OCL 1.1 spec) ///////////////////////////////////////////////////////////////////////////// INLINE_OVERLOADABLE float dot(float2 p0, float2 p1) { return mad(p0.x,p1.x,p0.y*p1.y); } INLINE_OVERLOADABLE float dot(float3 p0, float3 p1) { return mad(p0.x,p1.x,mad(p0.z,p1.z,p0.y*p1.y)); } INLINE_OVERLOADABLE float dot(float4 p0, float4 p1) { return mad(p0.x,p1.x,mad(p0.w,p1.w,mad(p0.z,p1.z,p0.y*p1.y))); } INLINE_OVERLOADABLE float dot(float8 p0, float8 p1) { return mad(p0.x,p1.x,mad(p0.s7,p1.s7, mad(p0.s6,p1.s6,mad(p0.s5,p1.s5, mad(p0.s4,p1.s4,mad(p0.w,p1.w, mad(p0.z,p1.z,p0.y*p1.y))))))); } INLINE_OVERLOADABLE float dot(float16 p0, float16 p1) { return mad(p0.sc,p1.sc,mad(p0.sd,p1.sd,mad(p0.se,p1.se,mad(p0.sf,p1.sf, mad(p0.s8,p1.s8,mad(p0.s9,p1.s9,mad(p0.sa,p1.sa,mad(p0.sb,p1.sb, mad(p0.x,p1.x,mad(p0.s7,p1.s7, mad(p0.s6,p1.s6,mad(p0.s5,p1.s5, mad(p0.s4,p1.s4,mad(p0.w,p1.w, mad(p0.z,p1.z,p0.y*p1.y))))))))))))))); } INLINE_OVERLOADABLE float length(float x) { return __gen_ocl_fabs(x); } INLINE_OVERLOADABLE float length(float2 x) { return sqrt(dot(x,x)); } INLINE_OVERLOADABLE float length(float3 x) { return sqrt(dot(x,x)); } INLINE_OVERLOADABLE float length(float4 x) { return sqrt(dot(x,x)); } INLINE_OVERLOADABLE float length(float8 x) { return sqrt(dot(x,x)); } INLINE_OVERLOADABLE float length(float16 x) { return sqrt(dot(x,x)); } INLINE_OVERLOADABLE float distance(float x, float y) { return length(x-y); } INLINE_OVERLOADABLE float distance(float2 x, float2 y) { return length(x-y); } INLINE_OVERLOADABLE float distance(float3 x, float3 y) { return length(x-y); } INLINE_OVERLOADABLE float distance(float4 x, float4 y) { return length(x-y); } INLINE_OVERLOADABLE float distance(float8 x, float8 y) { return length(x-y); } INLINE_OVERLOADABLE float distance(float16 x, float16 y) { return length(x-y); } INLINE_OVERLOADABLE float normalize(float x) { return 1.f; } INLINE_OVERLOADABLE float2 normalize(float2 x) { return x * rsqrt(dot(x, x)); } INLINE_OVERLOADABLE float3 normalize(float3 x) { return x * rsqrt(dot(x, x)); } INLINE_OVERLOADABLE float4 normalize(float4 x) { return x * rsqrt(dot(x, x)); } INLINE_OVERLOADABLE float8 normalize(float8 x) { return x * rsqrt(dot(x, x)); } INLINE_OVERLOADABLE float16 normalize(float16 x) { return x * rsqrt(dot(x, x)); } INLINE_OVERLOADABLE float fast_length(float x) { return __gen_ocl_fabs(x); } INLINE_OVERLOADABLE float fast_length(float2 x) { return sqrt(dot(x,x)); } INLINE_OVERLOADABLE float fast_length(float3 x) { return sqrt(dot(x,x)); } INLINE_OVERLOADABLE float fast_length(float4 x) { return sqrt(dot(x,x)); } INLINE_OVERLOADABLE float fast_length(float8 x) { return sqrt(dot(x,x)); } INLINE_OVERLOADABLE float fast_length(float16 x) { return sqrt(dot(x,x)); } INLINE_OVERLOADABLE float fast_distance(float x, float y) { return length(x-y); } INLINE_OVERLOADABLE float fast_distance(float2 x, float2 y) { return length(x-y); } INLINE_OVERLOADABLE float fast_distance(float3 x, float3 y) { return length(x-y); } INLINE_OVERLOADABLE float fast_distance(float4 x, float4 y) { return length(x-y); } INLINE_OVERLOADABLE float fast_distance(float8 x, float8 y) { return length(x-y); } INLINE_OVERLOADABLE float fast_distance(float16 x, float16 y) { return length(x-y); } INLINE_OVERLOADABLE float fast_normalize(float x) { return 1.f; } INLINE_OVERLOADABLE float2 fast_normalize(float2 x) { return x * rsqrt(dot(x, x)); } INLINE_OVERLOADABLE float3 fast_normalize(float3 x) { return x * rsqrt(dot(x, x)); } INLINE_OVERLOADABLE float4 fast_normalize(float4 x) { return x * rsqrt(dot(x, x)); } INLINE_OVERLOADABLE float8 fast_normalize(float8 x) { return x * rsqrt(dot(x, x)); } INLINE_OVERLOADABLE float16 fast_normalize(float16 x) { return x * rsqrt(dot(x, x)); } INLINE_OVERLOADABLE float3 cross(float3 v0, float3 v1) { return v0.yzx*v1.zxy-v0.zxy*v1.yzx; } INLINE_OVERLOADABLE float4 cross(float4 v0, float4 v1) { return (float4)(v0.yzx*v1.zxy-v0.zxy*v1.yzx, 0.f); } ///////////////////////////////////////////////////////////////////////////// // Vector loads and stores ///////////////////////////////////////////////////////////////////////////// // These loads and stores will use untyped reads and writes, so we can just // cast to vector loads / stores. Not C99 compliant BTW due to aliasing issue. // Well we do not care, we do not activate TBAA in the compiler #define DECL_UNTYPED_RW_SPACE_N(TYPE, DIM, SPACE) \ INLINE_OVERLOADABLE TYPE##DIM vload##DIM(size_t offset, const SPACE TYPE *p) { \ return *(SPACE TYPE##DIM *) (p + DIM * offset); \ } \ INLINE_OVERLOADABLE void vstore##DIM(TYPE##DIM v, size_t offset, SPACE TYPE *p) { \ *(SPACE TYPE##DIM *) (p + DIM * offset) = v; \ } #define DECL_UNTYPED_V3_SPACE(TYPE, SPACE) \ INLINE_OVERLOADABLE void vstore3(TYPE##3 v, size_t offset, SPACE TYPE *p) {\ *(p + 3 * offset) = v.s0; \ *(p + 3 * offset + 1) = v.s1; \ *(p + 3 * offset + 2) = v.s2; \ } \ INLINE_OVERLOADABLE TYPE##3 vload3(size_t offset, const SPACE TYPE *p) { \ return *(SPACE TYPE##3 *) (p + 3 * offset); \ } #define DECL_UNTYPED_RW_ALL_SPACE(TYPE, SPACE) \ DECL_UNTYPED_RW_SPACE_N(TYPE, 2, SPACE) \ DECL_UNTYPED_V3_SPACE(TYPE, SPACE) \ DECL_UNTYPED_RW_SPACE_N(TYPE, 4, SPACE) \ DECL_UNTYPED_RW_SPACE_N(TYPE, 8, SPACE) \ DECL_UNTYPED_RW_SPACE_N(TYPE, 16, SPACE) #define DECL_UNTYPED_RW_ALL(TYPE) \ DECL_UNTYPED_RW_ALL_SPACE(TYPE, __global) \ DECL_UNTYPED_RW_ALL_SPACE(TYPE, __local) \ DECL_UNTYPED_RW_ALL_SPACE(TYPE, __constant) \ DECL_UNTYPED_RW_ALL_SPACE(TYPE, __private) DECL_UNTYPED_RW_ALL(char) DECL_UNTYPED_RW_ALL(uchar) DECL_UNTYPED_RW_ALL(short) DECL_UNTYPED_RW_ALL(ushort) DECL_UNTYPED_RW_ALL(int) DECL_UNTYPED_RW_ALL(uint) DECL_UNTYPED_RW_ALL(long) DECL_UNTYPED_RW_ALL(ulong) DECL_UNTYPED_RW_ALL(float) DECL_UNTYPED_RW_ALL(double) #undef DECL_UNTYPED_RW_ALL #undef DECL_UNTYPED_RW_ALL_SPACE #undef DECL_UNTYPED_RW_SPACE_N // XXX workaround ptx profile #define fabs __gen_ocl_internal_fabs #define trunc __gen_ocl_internal_trunc #define round __gen_ocl_internal_round #define floor __gen_ocl_internal_floor #define ceil __gen_ocl_internal_ceil #define log __gen_ocl_internal_log #define log2 __gen_ocl_internal_log2 #define log10 __gen_ocl_internal_log10 #define exp __gen_ocl_internal_exp #define exp2 native_exp2 #define exp10 native_exp10 #define expm1 __gen_ocl_internal_expm1 #define fmin __gen_ocl_internal_fmin #define fmax __gen_ocl_internal_fmax #define fma mad #define fdim __gen_ocl_internal_fdim #define maxmag __gen_ocl_internal_maxmag #define minmag __gen_ocl_internal_minmag ///////////////////////////////////////////////////////////////////////////// // Miscellaneous Vector Functions (see 6.11.12 of OCL 1.1 spec) ///////////////////////////////////////////////////////////////////////////// #define DEC2(TYPE, XTYPE) \ INLINE_OVERLOADABLE TYPE##2 shuffle(XTYPE x, uint2 mask) { \ TYPE##2 y; \ y.s0 = ((TYPE *) &x)[mask.s0 & (vec_step(x) - 1)]; \ y.s1 = ((TYPE *) &x)[mask.s1 & (vec_step(x) - 1)]; \ return y; \ } #define DEC4(TYPE, XTYPE) \ INLINE_OVERLOADABLE TYPE##4 shuffle(XTYPE x, uint4 mask) { \ TYPE##4 y; \ y.s0 = ((TYPE *) &x)[mask.s0 & (vec_step(x) - 1)]; \ y.s1 = ((TYPE *) &x)[mask.s1 & (vec_step(x) - 1)]; \ y.s2 = ((TYPE *) &x)[mask.s2 & (vec_step(x) - 1)]; \ y.s3 = ((TYPE *) &x)[mask.s3 & (vec_step(x) - 1)]; \ return y; \ } #define DEC8(TYPE, XTYPE) \ INLINE_OVERLOADABLE TYPE##8 shuffle(XTYPE x, uint8 mask) { \ TYPE##8 y; \ y.s0 = ((TYPE *) &x)[mask.s0 & (vec_step(x) - 1)]; \ y.s1 = ((TYPE *) &x)[mask.s1 & (vec_step(x) - 1)]; \ y.s2 = ((TYPE *) &x)[mask.s2 & (vec_step(x) - 1)]; \ y.s3 = ((TYPE *) &x)[mask.s3 & (vec_step(x) - 1)]; \ y.s4 = ((TYPE *) &x)[mask.s4 & (vec_step(x) - 1)]; \ y.s5 = ((TYPE *) &x)[mask.s5 & (vec_step(x) - 1)]; \ y.s6 = ((TYPE *) &x)[mask.s6 & (vec_step(x) - 1)]; \ y.s7 = ((TYPE *) &x)[mask.s7 & (vec_step(x) - 1)]; \ return y; \ } #define DEC16(TYPE, XTYPE) \ INLINE_OVERLOADABLE TYPE##16 shuffle(XTYPE x, uint16 mask) { \ TYPE##16 y; \ y.s0 = ((TYPE *) &x)[mask.s0 & (vec_step(x) - 1)]; \ y.s1 = ((TYPE *) &x)[mask.s1 & (vec_step(x) - 1)]; \ y.s2 = ((TYPE *) &x)[mask.s2 & (vec_step(x) - 1)]; \ y.s3 = ((TYPE *) &x)[mask.s3 & (vec_step(x) - 1)]; \ y.s4 = ((TYPE *) &x)[mask.s4 & (vec_step(x) - 1)]; \ y.s5 = ((TYPE *) &x)[mask.s5 & (vec_step(x) - 1)]; \ y.s6 = ((TYPE *) &x)[mask.s6 & (vec_step(x) - 1)]; \ y.s7 = ((TYPE *) &x)[mask.s7 & (vec_step(x) - 1)]; \ y.s8 = ((TYPE *) &x)[mask.s8 & (vec_step(x) - 1)]; \ y.s9 = ((TYPE *) &x)[mask.s9 & (vec_step(x) - 1)]; \ y.sa = ((TYPE *) &x)[mask.sa & (vec_step(x) - 1)]; \ y.sb = ((TYPE *) &x)[mask.sb & (vec_step(x) - 1)]; \ y.sc = ((TYPE *) &x)[mask.sc & (vec_step(x) - 1)]; \ y.sd = ((TYPE *) &x)[mask.sd & (vec_step(x) - 1)]; \ y.se = ((TYPE *) &x)[mask.se & (vec_step(x) - 1)]; \ y.sf = ((TYPE *) &x)[mask.sf & (vec_step(x) - 1)]; \ return y; \ } #define DEF(TYPE) \ DEC2(TYPE, TYPE##2); DEC2(TYPE, TYPE##4); DEC2(TYPE, TYPE##8); DEC2(TYPE, TYPE##16) \ DEC4(TYPE, TYPE##2); DEC4(TYPE, TYPE##4); DEC4(TYPE, TYPE##8); DEC4(TYPE, TYPE##16) \ DEC8(TYPE, TYPE##2); DEC8(TYPE, TYPE##4); DEC8(TYPE, TYPE##8); DEC8(TYPE, TYPE##16) \ DEC16(TYPE, TYPE##2); DEC16(TYPE, TYPE##4); DEC16(TYPE, TYPE##8); DEC16(TYPE, TYPE##16) DEF(char) DEF(uchar) DEF(short) DEF(ushort) DEF(int) DEF(uint) DEF(float) DEF(long) DEF(ulong) #undef DEF #undef DEC2 #undef DEC4 #undef DEC8 #undef DEC16 #define DEC2(TYPE, ARGTYPE, TEMPTYPE) \ INLINE_OVERLOADABLE TYPE##2 shuffle2(ARGTYPE x, ARGTYPE y, uint2 mask) { \ return shuffle((TEMPTYPE)(x, y), mask); \ } #define DEC2X(TYPE) \ INLINE_OVERLOADABLE TYPE##2 shuffle2(TYPE##16 x, TYPE##16 y, uint2 mask) { \ TYPE##2 z; \ z.s0 = mask.s0 < 16 ? ((TYPE *)&x)[mask.s0] : ((TYPE *)&y)[mask.s0 & 15]; \ z.s1 = mask.s1 < 16 ? ((TYPE *)&x)[mask.s1] : ((TYPE *)&y)[mask.s1 & 15]; \ return z; \ } #define DEC4(TYPE, ARGTYPE, TEMPTYPE) \ INLINE_OVERLOADABLE TYPE##4 shuffle2(ARGTYPE x, ARGTYPE y, uint4 mask) { \ return shuffle((TEMPTYPE)(x, y), mask); \ } #define DEC4X(TYPE) \ INLINE_OVERLOADABLE TYPE##4 shuffle2(TYPE##16 x, TYPE##16 y, uint4 mask) { \ TYPE##4 z; \ z.s0 = mask.s0 < 16 ? ((TYPE *)&x)[mask.s0] : ((TYPE *)&y)[mask.s0 & 15]; \ z.s1 = mask.s1 < 16 ? ((TYPE *)&x)[mask.s1] : ((TYPE *)&y)[mask.s1 & 15]; \ z.s2 = mask.s2 < 16 ? ((TYPE *)&x)[mask.s2] : ((TYPE *)&y)[mask.s2 & 15]; \ z.s3 = mask.s3 < 16 ? ((TYPE *)&x)[mask.s3] : ((TYPE *)&y)[mask.s3 & 15]; \ return z; \ } #define DEC8(TYPE, ARGTYPE, TEMPTYPE) \ INLINE_OVERLOADABLE TYPE##8 shuffle2(ARGTYPE x, ARGTYPE y, uint8 mask) { \ return shuffle((TEMPTYPE)(x, y), mask); \ } #define DEC8X(TYPE) \ INLINE_OVERLOADABLE TYPE##8 shuffle2(TYPE##16 x, TYPE##16 y, uint8 mask) { \ TYPE##8 z; \ z.s0 = mask.s0 < 16 ? ((TYPE *)&x)[mask.s0] : ((TYPE *)&y)[mask.s0 & 15]; \ z.s1 = mask.s1 < 16 ? ((TYPE *)&x)[mask.s1] : ((TYPE *)&y)[mask.s1 & 15]; \ z.s2 = mask.s2 < 16 ? ((TYPE *)&x)[mask.s2] : ((TYPE *)&y)[mask.s2 & 15]; \ z.s3 = mask.s3 < 16 ? ((TYPE *)&x)[mask.s3] : ((TYPE *)&y)[mask.s3 & 15]; \ z.s4 = mask.s4 < 16 ? ((TYPE *)&x)[mask.s4] : ((TYPE *)&y)[mask.s4 & 15]; \ z.s5 = mask.s5 < 16 ? ((TYPE *)&x)[mask.s5] : ((TYPE *)&y)[mask.s5 & 15]; \ z.s6 = mask.s6 < 16 ? ((TYPE *)&x)[mask.s6] : ((TYPE *)&y)[mask.s6 & 15]; \ z.s7 = mask.s7 < 16 ? ((TYPE *)&x)[mask.s7] : ((TYPE *)&y)[mask.s7 & 15]; \ return z; \ } #define DEC16(TYPE, ARGTYPE, TEMPTYPE) \ INLINE_OVERLOADABLE TYPE##16 shuffle2(ARGTYPE x, ARGTYPE y, uint16 mask) { \ return shuffle((TEMPTYPE)(x, y), mask); \ } #define DEC16X(TYPE) \ INLINE_OVERLOADABLE TYPE##16 shuffle2(TYPE##16 x, TYPE##16 y, uint16 mask) { \ TYPE##16 z; \ z.s0 = mask.s0 < 16 ? ((TYPE *)&x)[mask.s0] : ((TYPE *)&y)[mask.s0 & 15]; \ z.s1 = mask.s1 < 16 ? ((TYPE *)&x)[mask.s1] : ((TYPE *)&y)[mask.s1 & 15]; \ z.s2 = mask.s2 < 16 ? ((TYPE *)&x)[mask.s2] : ((TYPE *)&y)[mask.s2 & 15]; \ z.s3 = mask.s3 < 16 ? ((TYPE *)&x)[mask.s3] : ((TYPE *)&y)[mask.s3 & 15]; \ z.s4 = mask.s4 < 16 ? ((TYPE *)&x)[mask.s4] : ((TYPE *)&y)[mask.s4 & 15]; \ z.s5 = mask.s5 < 16 ? ((TYPE *)&x)[mask.s5] : ((TYPE *)&y)[mask.s5 & 15]; \ z.s6 = mask.s6 < 16 ? ((TYPE *)&x)[mask.s6] : ((TYPE *)&y)[mask.s6 & 15]; \ z.s7 = mask.s7 < 16 ? ((TYPE *)&x)[mask.s7] : ((TYPE *)&y)[mask.s7 & 15]; \ z.s8 = mask.s8 < 16 ? ((TYPE *)&x)[mask.s8] : ((TYPE *)&y)[mask.s8 & 15]; \ z.s9 = mask.s9 < 16 ? ((TYPE *)&x)[mask.s9] : ((TYPE *)&y)[mask.s9 & 15]; \ z.sa = mask.sa < 16 ? ((TYPE *)&x)[mask.sa] : ((TYPE *)&y)[mask.sa & 15]; \ z.sb = mask.sb < 16 ? ((TYPE *)&x)[mask.sb] : ((TYPE *)&y)[mask.sb & 15]; \ z.sc = mask.sc < 16 ? ((TYPE *)&x)[mask.sc] : ((TYPE *)&y)[mask.sc & 15]; \ z.sd = mask.sd < 16 ? ((TYPE *)&x)[mask.sd] : ((TYPE *)&y)[mask.sd & 15]; \ z.se = mask.se < 16 ? ((TYPE *)&x)[mask.se] : ((TYPE *)&y)[mask.se & 15]; \ z.sf = mask.sf < 16 ? ((TYPE *)&x)[mask.sf] : ((TYPE *)&y)[mask.sf & 15]; \ return z; \ } #define DEF(TYPE) \ DEC2(TYPE, TYPE##2, TYPE##4) \ DEC2(TYPE, TYPE##4, TYPE##8) \ DEC2(TYPE, TYPE##8, TYPE##16) \ DEC2X(TYPE) \ DEC4(TYPE, TYPE##2, TYPE##4) \ DEC4(TYPE, TYPE##4, TYPE##8) \ DEC4(TYPE, TYPE##8, TYPE##16) \ DEC4X(TYPE) \ DEC8(TYPE, TYPE##2, TYPE##4) \ DEC8(TYPE, TYPE##4, TYPE##8) \ DEC8(TYPE, TYPE##8, TYPE##16) \ DEC8X(TYPE) \ DEC16(TYPE, TYPE##2, TYPE##4) \ DEC16(TYPE, TYPE##4, TYPE##8) \ DEC16(TYPE, TYPE##8, TYPE##16) \ DEC16X(TYPE) DEF(char) DEF(uchar) DEF(short) DEF(ushort) DEF(int) DEF(uint) DEF(float) DEF(long) DEF(ulong) #undef DEF #undef DEC2 #undef DEC2X #undef DEC4 #undef DEC4X #undef DEC8 #undef DEC8X #undef DEC16 #undef DEC16X ///////////////////////////////////////////////////////////////////////////// // Synchronization functions ///////////////////////////////////////////////////////////////////////////// #define CLK_LOCAL_MEM_FENCE (1 << 0) #define CLK_GLOBAL_MEM_FENCE (1 << 1) void __gen_ocl_barrier_local(void); void __gen_ocl_barrier_global(void); void __gen_ocl_barrier_local_and_global(void); typedef uint cl_mem_fence_flags; INLINE void barrier(cl_mem_fence_flags flags) { if (flags == (CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE)) __gen_ocl_barrier_local_and_global(); else if (flags == CLK_LOCAL_MEM_FENCE) __gen_ocl_barrier_local(); else if (flags == CLK_GLOBAL_MEM_FENCE) __gen_ocl_barrier_global(); } INLINE void mem_fence(cl_mem_fence_flags flags) { } INLINE void read_mem_fence(cl_mem_fence_flags flags) { } INLINE void write_mem_fence(cl_mem_fence_flags flags) { } ///////////////////////////////////////////////////////////////////////////// // Async Copies and prefetch ///////////////////////////////////////////////////////////////////////////// #define BODY(SRC_STRIDE, DST_STRIDE) \ uint size = get_local_size(2) * get_local_size(1) * get_local_size(0); \ uint count = num / size; \ uint offset = get_local_id(2) * get_local_size(1) + get_local_id(1); \ offset = offset * get_local_size(0) + get_local_id(0); \ for(uint i=0; i= __gen_ocl_get_image_width(surface) \ || coord.s1 >= __gen_ocl_get_image_height(surface))) \ || ((normalized != 0) && (coord.s0 > 0x1p0 || coord.s1 > 0x1p0))) #define FIXUP_FLOAT_COORD(tmpCoord) \ { \ if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f) \ tmpCoord.s0 += -0x1p-9; \ if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20f) \ tmpCoord.s1 += -0x1p-9f; \ } #define DECL_IMAGE(int_clamping_fix, image_type, type, suffix, n) \ DECL_READ_IMAGE(0, int_clamping_fix, image_type, type, suffix, int ##n) \ DECL_READ_IMAGE(GEN_FIX_1, int_clamping_fix, image_type, type, suffix, float ##n) \ DECL_READ_IMAGE_NOSAMPLER(image_type, type, suffix, int ##n) \ DECL_WRITE_IMAGE(image_type, type, suffix, int ## n) \ DECL_WRITE_IMAGE(image_type, type, suffix, float ## n) DECL_IMAGE(GEN_FIX_1, image2d_t, int4, i, 2) DECL_IMAGE(GEN_FIX_1, image2d_t, uint4, ui, 2) DECL_IMAGE(0, image2d_t, float4, f, 2) #undef EXPEND_READ_COORD #undef EXPEND_WRITE_COORD #undef OUT_OF_BOX #undef FIXUP_FLOAT_COORD #define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, coord.s1, coord.s2 #define EXPEND_WRITE_COORD(id, coord, color) id, coord.s0, coord.s1, coord.s2, color #define OUT_OF_BOX(coord, surface, normalized) \ (coord.s0 < 0 || coord.s1 < 0 || coord.s2 < 0 || \ ((normalized == 0) \ && (coord.s0 >= __gen_ocl_get_image_width(surface) \ || coord.s1 >= __gen_ocl_get_image_height(surface) \ || coord.s2 >= __gen_ocl_get_image_depth(surface))) \ || ((normalized != 0) \ &&(coord.s0 > 1 || coord.s1 > 1 || coord.s2 > 1))) #define FIXUP_FLOAT_COORD(tmpCoord) \ { \ if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20) \ tmpCoord.s0 += -0x1p-9; \ if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20) \ tmpCoord.s1 += -0x1p-9; \ if (tmpCoord.s2 < 0 && tmpCoord.s2 > -0x1p-20) \ tmpCoord.s2 += -0x1p-9; \ } DECL_IMAGE(GEN_FIX_1, image3d_t, int4, i, 4) DECL_IMAGE(GEN_FIX_1, image3d_t, uint4, ui, 4) DECL_IMAGE(0, image3d_t, float4, f, 4) DECL_IMAGE(GEN_FIX_1, image3d_t, int4, i, 3) DECL_IMAGE(GEN_FIX_1, image3d_t, uint4, ui, 3) DECL_IMAGE(0, image3d_t, float4, f, 3) #undef EXPEND_READ_COORD #undef EXPEND_WRITE_COORD #undef OUT_OF_BOX #undef FIXUP_FLOAT_COORD #undef DECL_IMAGE #undef DECL_READ_IMAGE #undef DECL_READ_IMAGE_NOSAMPLER #undef DECL_WRITE_IMAGE #undef GEN_FIX_1 #define DECL_IMAGE_INFO(image_type) \ INLINE_OVERLOADABLE int get_image_width(image_type image) \ { \ GET_IMAGE(image, surface_id);\ return __gen_ocl_get_image_width(surface_id);\ } \ INLINE_OVERLOADABLE int get_image_height(image_type image)\ { \ GET_IMAGE(image, surface_id);\ return __gen_ocl_get_image_height(surface_id); \ } \ INLINE_OVERLOADABLE int get_image_channel_data_type(image_type image)\ { \ GET_IMAGE(image, surface_id);\ return __gen_ocl_get_image_channel_data_type(surface_id); \ }\ INLINE_OVERLOADABLE int get_image_channel_order(image_type image)\ { \ GET_IMAGE(image, surface_id);\ return __gen_ocl_get_image_channel_order(surface_id); \ } DECL_IMAGE_INFO(image2d_t) DECL_IMAGE_INFO(image3d_t) INLINE_OVERLOADABLE int get_image_depth(image3d_t image) { GET_IMAGE(image, surface_id); return __gen_ocl_get_image_depth(surface_id); } INLINE_OVERLOADABLE int2 get_image_dim(image2d_t image) { return (int2){get_image_width(image), get_image_height(image)}; } INLINE_OVERLOADABLE int4 get_image_dim(image3d_t image) { return (int4){get_image_width(image), get_image_height(image), get_image_depth(image), 0}; } #if 0 /* The following functions are not implemented yet. */ DECL_IMAGE_INFO(image1d_t) DECL_IMAGE_INFO(image1d_buffer_t) DECL_IMAGE_INFO(image1d_array_t) DECL_IMAGE_INFO(image2d_array_t) INLINE_OVERLOADABLE int2 get_image_dim(image2d_array_t image) { return __gen_ocl_get_image_dim(image); } INLINE_OVERLOADABLE int4 get_image_dim(image2d_array_t image) { return __gen_ocl_get_image_dim(image); } INLINE_OVERLOADABLE size_t get_image_array_size(image2d_array_t image) { return __gen_ocl_get_image_array_size(image); } INLINE_OVERLOADABLE size_t get_image_array_size(image1d_array_t image) { return __gen_ocl_get_image_array_size(image); } #endif #pragma OPENCL EXTENSION cl_khr_fp64 : disable #undef DECL_IMAGE #undef DECL_READ_IMAGE #undef DECL_READ_IMAGE_NOSAMPLER #undef DECL_WRITE_IMAGE #undef GET_IMAGE // ##BEGIN_VECTOR## // ##END_VECTOR## #undef INLINE_OVERLOADABLE #undef PURE #undef CONST #undef OVERLOADABLE #undef INLINE #endif /* __GEN_OCL_STDLIB_H__ */ Release_v0.3/backend/src/sys/000077500000000000000000000000001223142177000161775ustar00rootroot00000000000000Release_v0.3/backend/src/sys/alloc.cpp000066400000000000000000000265451223142177000200110ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file alloc.cpp * \author Benjamin Segovia * * Provides facilities to track allocations and pre-initialize memory at * memory allocation and memory free time */ #include "sys/alloc.hpp" #include "sys/atomic.hpp" #include "sys/mutex.hpp" #if GBE_DEBUG_MEMORY #include #include #endif /* GBE_DEBUG_MEMORY */ #if defined(__ICC__) #include #endif /* __ICC__ */ #include #include #include //////////////////////////////////////////////////////////////////////////////// /// Memory debugger //////////////////////////////////////////////////////////////////////////////// #if GBE_DEBUG_MEMORY namespace gbe { /*! Store each allocation data */ struct AllocData { INLINE AllocData(void) {} INLINE AllocData(int fileName_, int functionName_, int line_, intptr_t alloc_) : fileName(fileName_), functionName(functionName_), line(line_), alloc(alloc_) {} int fileName, functionName, line; intptr_t alloc; }; /*! Store allocation information */ struct MemDebugger { MemDebugger(void) : unfreedNum(0), allocNum(0) {} ~MemDebugger(void) { this->dumpAlloc(); } void* insertAlloc(void *ptr, const char *file, const char *function, int line); void removeAlloc(void *ptr); void dumpAlloc(void); void dumpData(const AllocData &data); /*! Count the still unfreed allocations */ volatile intptr_t unfreedNum; /*! Total number of allocations done */ volatile intptr_t allocNum; /*! Sorts the file name and function name strings */ std::tr1::unordered_map staticStringMap; /*! Each element contains the actual string */ std::vector staticStringVector; std::map allocMap; /*! Protect the memory debugger accesses */ MutexSys mutex; }; void* MemDebugger::insertAlloc(void *ptr, const char *file, const char *function, int line) { if (ptr == NULL) return ptr; Lock lock(mutex); const uintptr_t iptr = (uintptr_t) ptr; if (UNLIKELY(allocMap.find(iptr) != allocMap.end())) { this->dumpData(allocMap.find(iptr)->second); FATAL("Pointer already in map"); } const auto fileIt = staticStringMap.find(file); const auto functionIt = staticStringMap.find(function); int fileName, functionName; if (fileIt == staticStringMap.end()) { staticStringVector.push_back(file); staticStringMap[file] = fileName = int(staticStringVector.size()) - 1; } else fileName = staticStringMap[file]; if (functionIt == staticStringMap.end()) { staticStringVector.push_back(function); staticStringMap[function] = functionName = int(staticStringVector.size()) - 1; } else functionName = staticStringMap[function]; allocMap[iptr] = AllocData(fileName, functionName, line, allocNum); unfreedNum++; allocNum++; return ptr; } void MemDebugger::removeAlloc(void *ptr) { if (ptr == NULL) return; Lock lock(mutex); const uintptr_t iptr = (uintptr_t) ptr; FATAL_IF(allocMap.find(iptr) == allocMap.end(), "Pointer not referenced"); allocMap.erase(iptr); unfreedNum--; } void MemDebugger::dumpData(const AllocData &data) { std::cerr << "ALLOC " << data.alloc << ": " << "file " << staticStringVector[data.fileName] << ", " << "function " << staticStringVector[data.functionName] << ", " << "line " << data.line << std::endl; } void MemDebugger::dumpAlloc(void) { std::cerr << "MemDebugger: Unfreed number: " << unfreedNum << std::endl; for (const auto &alloc : allocMap) this->dumpData(alloc.second); std::cerr << "MemDebugger: " << staticStringVector.size() << " allocated static strings" << std::endl; } /*! The user can deactivate the memory initialization */ static bool memoryInitializationEnabled = true; /*! Declare C like interface functions here */ static MemDebugger *memDebugger = NULL; /*! Monitor maximum memory requirement in the compiler */ static MutexSys *sizeMutex = NULL; static bool isMutexInitializing = true; static size_t memDebuggerCurrSize(0u); static size_t memDebuggerMaxSize(0u); static void SizeMutexDeallocate(void) { if (sizeMutex) delete sizeMutex; } static void SizeMutexAllocate(void) { if (sizeMutex == NULL && isMutexInitializing == false) { isMutexInitializing = true; sizeMutex = new MutexSys; atexit(SizeMutexDeallocate); } } /*! Stop the memory debugger */ static void MemDebuggerEnd(void) { MemDebugger *_debug = memDebugger; memDebugger = NULL; std::cout << "Maximum memory consumption: " << std::setprecision(2) << std::fixed << float(memDebuggerMaxSize) / 1024. << "KB" << std::endl; delete _debug; GBE_ASSERT(memDebuggerCurrSize == 0); } /*! Bring up the debugger at pre-main */ static struct ForceMemDebugger { ForceMemDebugger(void) { doesnotmatter = GBE_NEW(int); GBE_DELETE(doesnotmatter); } int *doesnotmatter; } forceMemDebugger; /*! Start the memory debugger */ static void MemDebuggerStart(void) { if (memDebugger == NULL) { atexit(MemDebuggerEnd); memDebugger = new MemDebugger; } } void* MemDebuggerInsertAlloc(void *ptr, const char *file, const char *function, int line) { if (memDebugger == NULL) MemDebuggerStart(); return memDebugger->insertAlloc(ptr, file, function, line); } void MemDebuggerRemoveAlloc(void *ptr) { if (memDebugger == NULL) MemDebuggerStart(); memDebugger->removeAlloc(ptr); } void MemDebuggerDumpAlloc(void) { if (memDebugger == NULL) MemDebuggerStart(); memDebugger->dumpAlloc(); } void MemDebuggerEnableMemoryInitialization(bool enabled) { memoryInitializationEnabled = enabled; } void MemDebuggerInitializeMem(void *mem, size_t sz) { if (memoryInitializationEnabled) std::memset(mem, 0xcd, sz); } } /* namespace gbe */ #endif /* GBE_DEBUG_MEMORY */ namespace gbe { #if GBE_DEBUG_MEMORY void* memAlloc(size_t size) { void *ptr = std::malloc(size + sizeof(size_t)); *(size_t *) ptr = size; MemDebuggerInitializeMem((char*) ptr + sizeof(size_t), size); SizeMutexAllocate(); if (sizeMutex) sizeMutex->lock(); memDebuggerCurrSize += size; memDebuggerMaxSize = std::max(memDebuggerCurrSize, memDebuggerMaxSize); if (sizeMutex) sizeMutex->unlock(); return (char *) ptr + sizeof(size_t); } void memFree(void *ptr) { if (ptr != NULL) { char *toFree = (char*) ptr - sizeof(size_t); const size_t size = *(size_t *) toFree; MemDebuggerInitializeMem(ptr, size); SizeMutexAllocate(); if (sizeMutex) sizeMutex->lock(); memDebuggerCurrSize -= size; if (sizeMutex) sizeMutex->unlock(); std::free(toFree); } } #else void* memAlloc(size_t size) { return std::malloc(size); } void memFree(void *ptr) { if (ptr != NULL) std::free(ptr); } #endif /* GBE_DEBUG_MEMORY */ } /* namespace gbe */ #if GBE_DEBUG_MEMORY namespace gbe { void* alignedMalloc(size_t size, size_t align) { void* mem = malloc(size+align+sizeof(uintptr_t) + sizeof(void*)); FATAL_IF (!mem && size, "memory allocation failed"); char* aligned = (char*) mem + sizeof(uintptr_t) + sizeof(void*); aligned += align - ((uintptr_t)aligned & (align - 1)); ((void**)aligned)[-1] = mem; ((uintptr_t*)aligned)[-2] = uintptr_t(size); MemDebuggerInitializeMem(aligned, size); SizeMutexAllocate(); if (sizeMutex) sizeMutex->lock(); memDebuggerCurrSize += size; memDebuggerMaxSize = std::max(memDebuggerCurrSize, memDebuggerMaxSize); if (sizeMutex) sizeMutex->unlock(); return aligned; } void alignedFree(void* ptr) { if (ptr) { const size_t size = ((uintptr_t*)ptr)[-2]; MemDebuggerInitializeMem(ptr, size); free(((void**)ptr)[-1]); SizeMutexAllocate(); if (sizeMutex) sizeMutex->lock(); memDebuggerCurrSize -= size; if (sizeMutex) sizeMutex->unlock(); } } } /* namespace gbe */ #else /* GBE_DEBUG_MEMORY */ //////////////////////////////////////////////////////////////////////////////// /// Linux Platform //////////////////////////////////////////////////////////////////////////////// #if defined(__LINUX__) || defined(__GLIBC__) #include #include #include #include #include namespace gbe { void* alignedMalloc(size_t size, size_t align) { void* ptr = memalign(align,size); FATAL_IF (!ptr && size, "memory allocation failed"); MemDebuggerInitializeMem(ptr, size); return ptr; } void alignedFree(void *ptr) { if (ptr) std::free(ptr); } } /* namespace gbe */ #else #error "Unsupported platform" #endif /* __LINUX__ */ #endif //////////////////////////////////////////////////////////////////////////////// // Linear allocator //////////////////////////////////////////////////////////////////////////////// namespace gbe { LinearAllocator::Segment::Segment(size_t size) : size(size), offset(0u), data(alignedMalloc(size, CACHE_LINE)), next(NULL){} LinearAllocator::Segment::~Segment(void) { alignedFree(data); if (this->next) GBE_DELETE(this->next); } LinearAllocator::LinearAllocator(size_t minSize, size_t maxSize) : maxSize(std::max(maxSize, size_t(CACHE_LINE))) { this->curr = GBE_NEW(LinearAllocator::Segment, std::max(minSize, size_t(1))); } LinearAllocator::~LinearAllocator(void) { if (this->curr) GBE_DELETE(this->curr); } void *LinearAllocator::allocate(size_t size) { #if GBE_DEBUG_SPECIAL_ALLOCATOR if (ptr) GBE_ALIGNED_MALLOC(size, sizeof(void*)); #else // Try to use the current segment. This is the most likely condition here this->curr->offset = ALIGN(this->curr->offset, sizeof(void*)); if (this->curr->offset + size <= this->curr->size) { char *ptr = (char*) curr->data + this->curr->offset; this->curr->offset += size; return (void*) ptr; } // Well not really a use case in this code base if (UNLIKELY(size > maxSize)) { // This is really bad since we do two allocations Segment *unfortunate = GBE_NEW(Segment, size); GBE_ASSERT(this->curr); Segment *next = this->curr->next; this->curr->next = unfortunate; unfortunate->next = next; return unfortunate->data; } // OK. We need a new segment const size_t segmentSize = std::max(size, 2*this->curr->size); Segment *next = GBE_NEW(Segment, segmentSize); next->next = curr; this->curr = next; char *ptr = (char*) curr->data; this->curr->offset += size; return ptr; #endif } } /* namespace gbe */ Release_v0.3/backend/src/sys/alloc.hpp000066400000000000000000000254131223142177000200070ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file alloc.hpp * \author Benjamin Segovia */ #ifndef __GBE_ALLOC_HPP__ #define __GBE_ALLOC_HPP__ #include "sys/platform.hpp" #include "sys/assert.hpp" #include #include namespace gbe { /*! regular allocation */ void* memAlloc(size_t size); void memFree(void *ptr); /*! Aligned allocation */ void* alignedMalloc(size_t size, size_t align = 64); void alignedFree(void* ptr); /*! Monitor memory allocations */ #if GBE_DEBUG_MEMORY void* MemDebuggerInsertAlloc(void*, const char*, const char*, int); void MemDebuggerRemoveAlloc(void *ptr); void MemDebuggerDumpAlloc(void); void MemDebuggerInitializeMem(void *mem, size_t sz); void MemDebuggerEnableMemoryInitialization(bool enabled); #else INLINE void* MemDebuggerInsertAlloc(void *ptr, const char*, const char*, int) {return ptr;} INLINE void MemDebuggerRemoveAlloc(void *ptr) {} INLINE void MemDebuggerDumpAlloc(void) {} INLINE void MemDebuggerInitializeMem(void *mem, size_t sz) {} INLINE void MemDebuggerEnableMemoryInitialization(bool enabled) {} #endif /* GBE_DEBUG_MEMORY */ /*! Properly handle the allocated type */ template T* _MemDebuggerInsertAlloc(T *ptr, const char *file, const char *function, int line) { MemDebuggerInsertAlloc(ptr, file, function, line); return ptr; } } /* namespace gbe */ /*! Declare a class with custom allocators */ #define GBE_CLASS(TYPE) \ GBE_STRUCT(TYPE) \ private: /*! Declare a structure with custom allocators */ #define GBE_STRUCT(TYPE) \ public: \ void* operator new(size_t size) { \ return gbe::alignedMalloc(size, GBE_DEFAULT_ALIGNMENT); \ } \ void* operator new[](size_t size) { \ return gbe::alignedMalloc(size, GBE_DEFAULT_ALIGNMENT); \ } \ void* operator new(size_t size, void *p) { return p; } \ void* operator new[](size_t size, void *p) { return p; } \ void operator delete(void* ptr) { return gbe::alignedFree(ptr); } \ void operator delete[](void* ptr) { return gbe::alignedFree(ptr); } /*! Macros to handle allocation position */ #define GBE_NEW(T,...) \ gbe::_MemDebuggerInsertAlloc(new T(__VA_ARGS__), __FILE__, __FUNCTION__, __LINE__) #define GBE_NEW_NO_ARG(T) \ gbe::_MemDebuggerInsertAlloc(new T, __FILE__, __FUNCTION__, __LINE__) #define GBE_NEW_ARRAY(T,N,...) \ gbe::_MemDebuggerInsertAlloc(new T[N](__VA_ARGS__), __FILE__, __FUNCTION__, __LINE__) #define GBE_NEW_ARRAY_NO_ARG(T,N)\ gbe::_MemDebuggerInsertAlloc(new T[N], __FILE__, __FUNCTION__, __LINE__) #define GBE_NEW_P(T,X,...) \ gbe::_MemDebuggerInsertAlloc(new (X) T(__VA_ARGS__), __FILE__, __FUNCTION__, __LINE__) #define GBE_DELETE(X) \ do { gbe::MemDebuggerRemoveAlloc(X); delete X; } while (0) #define GBE_DELETE_ARRAY(X) \ do { gbe::MemDebuggerRemoveAlloc(X); delete[] X; } while (0) #define GBE_MALLOC(SZ) \ gbe::MemDebuggerInsertAlloc(gbe::memAlloc(SZ),__FILE__, __FUNCTION__, __LINE__) #define GBE_FREE(X) \ do { gbe::MemDebuggerRemoveAlloc(X); gbe::memFree(X); } while (0) #define GBE_ALIGNED_FREE(X) \ do { gbe::MemDebuggerRemoveAlloc(X); gbe::alignedFree(X); } while (0) #define GBE_ALIGNED_MALLOC(SZ,ALIGN) \ gbe::MemDebuggerInsertAlloc(gbe::alignedMalloc(SZ,ALIGN),__FILE__, __FUNCTION__, __LINE__) namespace gbe { /*! STL compliant allocator to intercept all memory allocations */ template class Allocator { public: typedef T value_type; typedef value_type* pointer; typedef const value_type* const_pointer; typedef value_type& reference; typedef const value_type& const_reference; typedef std::size_t size_type; typedef std::ptrdiff_t difference_type; typedef typename std::allocator::const_pointer void_allocator_ptr; template struct rebind { typedef Allocator other; }; INLINE Allocator(void) {} INLINE ~Allocator(void) {} INLINE Allocator(Allocator const&) {} template INLINE Allocator(Allocator const&) {} INLINE pointer address(reference r) { return &r; } INLINE const_pointer address(const_reference r) { return &r; } INLINE pointer allocate(size_type n, void_allocator_ptr = 0) { if (ALIGNOF(T) > sizeof(uintptr_t)) return (pointer) GBE_ALIGNED_MALLOC(n*sizeof(T), ALIGNOF(T)); else return (pointer) GBE_MALLOC(n * sizeof(T)); } INLINE void deallocate(pointer p, size_type) { if (ALIGNOF(T) > sizeof(uintptr_t)) GBE_ALIGNED_FREE(p); else GBE_FREE(p); } INLINE size_type max_size(void) const { return std::numeric_limits::max() / sizeof(T); } INLINE void construct(pointer p, const T& t = T()) { ::new(p) T(t); } INLINE void destroy(pointer p) { p->~T(); } INLINE bool operator==(Allocator const&) { return true; } INLINE bool operator!=(Allocator const& a) { return !operator==(a); } }; // Deactivate fast allocators #ifndef GBE_DEBUG_SPECIAL_ALLOCATOR #define GBE_DEBUG_SPECIAL_ALLOCATOR 0 #endif /*! A growing pool never gives memory to the system but chain free elements * together such as deallocation can be quickly done */ template class GrowingPool { public: GrowingPool(uint32_t elemNum = 1) : curr(GBE_NEW(GrowingPoolElem, elemNum <= 1 ? 1 : elemNum)), free(NULL), full(NULL), freeList(NULL) {} ~GrowingPool(void) { GBE_SAFE_DELETE(curr); GBE_SAFE_DELETE(free); GBE_SAFE_DELETE(full); } void *allocate(void) { #if GBE_DEBUG_SPECIAL_ALLOCATOR return GBE_ALIGNED_MALLOC(sizeof(T), ALIGNOF(T)); #else // Pick up an element from the free list if (this->freeList != NULL) { void *data = (void*) freeList; this->freeList = *(void**) freeList; return data; } // Pick up an element from the current block (if not full) if (this->curr->allocated < this->curr->maxElemNum) { void *data = (T*) curr->data + curr->allocated++; return data; } // Block is full this->curr->next = this->full; this->full = this->curr; // Try to pick up a free block if (this->free) this->getFreeBlock(); // No free block we must allocate a new one else this->curr = GBE_NEW(GrowingPoolElem, 2 * this->curr->maxElemNum); void *data = (T*) curr->data + curr->allocated++; return data; #endif /* GBE_DEBUG_SPECIAL_ALLOCATOR */ } void deallocate(void *t) { if (t == NULL) return; #if GBE_DEBUG_SPECIAL_ALLOCATOR GBE_ALIGNED_FREE(t); #else *(void**) t = this->freeList; this->freeList = t; #endif /* GBE_DEBUG_SPECIAL_ALLOCATOR */ } void rewind(void) { #if GBE_DEBUG_SPECIAL_ALLOCATOR == 0 // All free elements return to their blocks this->freeList = NULL; // Put back current block in full list if (this->curr) { this->curr->next = this->full; this->full = this->curr; this->curr = NULL; } // Reverse the chain list and mark all blocks as empty while (this->full) { GrowingPoolElem *next = this->full->next; this->full->allocated = 0; this->full->next = this->free; this->free = this->full; this->full = next; } // Provide a valid current block this->getFreeBlock(); #endif /* GBE_DEBUG_SPECIAL_ALLOCATOR */ } private: /*! Pick-up a free block */ INLINE void getFreeBlock(void) { GBE_ASSERT(this->free); this->curr = this->free; this->free = this->free->next; this->curr->next = NULL; } /*! Chunk of elements to allocate */ class GrowingPoolElem { friend class GrowingPool; GrowingPoolElem(size_t elemNum) { const size_t sz = std::max(sizeof(T), sizeof(void*)); this->data = (T*) GBE_ALIGNED_MALLOC(elemNum * sz, ALIGNOF(T)); this->next = NULL; this->maxElemNum = elemNum; this->allocated = 0; } ~GrowingPoolElem(void) { GBE_ALIGNED_FREE(this->data); if (this->next) GBE_DELETE(this->next); } T *data; GrowingPoolElem *next; size_t allocated, maxElemNum; }; GrowingPoolElem *curr; //!< To get new element from GrowingPoolElem *free; //!< Blocks that can be reused (after rewind) GrowingPoolElem *full; //!< Blocks fully used void *freeList; //!< Elements that have been deallocated GBE_CLASS(GrowingPool); }; /*! Helper macros to build and destroy objects with a growing pool */ #define DECL_POOL(TYPE, POOL) \ GrowingPool POOL; \ template \ TYPE *new##TYPE(Args&&... args) { \ return new (POOL.allocate()) TYPE(args...); \ } \ void delete##TYPE(TYPE *ptr) { \ ptr->~TYPE(); \ POOL.deallocate(ptr); \ } /*! A linear allocator just grows and does not reuse freed memory. It can * however allocate objects of any size */ class LinearAllocator { public: /*! Initiate the linear allocator (one segment is allocated) */ LinearAllocator(size_t minSize = CACHE_LINE, size_t maxSize = 64*KB); /*! Free up everything */ ~LinearAllocator(void); /*! Allocate size bytes */ void *allocate(size_t size); /*! Nothing here */ INLINE void deallocate(void *ptr) { #if GBE_DEBUG_SPECIAL_ALLOCATOR if (ptr) GBE_ALIGNED_FREE(ptr); #endif /* GBE_DEBUG_SPECIAL_ALLOCATOR */ } private: /*! Helds an allocated segment of memory */ struct Segment { /*! Allocate a new segment */ Segment(size_t size); /*! Destroy the segment and the next ones */ ~Segment(void); /* Size of the segment */ size_t size; /*! Offset to the next free bytes (if any left) */ size_t offset; /*! Pointer to valid data */ void *data; /*! Pointer to the next segment */ Segment *next; /*! Use internal allocator */ GBE_STRUCT(Segment); }; /*! Points to the current segment we can allocate from */ Segment *curr; /*! Maximum segment size */ size_t maxSize; /*! Use internal allocator */ GBE_CLASS(LinearAllocator); }; } /* namespace gbe */ #endif /* __GBE_ALLOC_HPP__ */ Release_v0.3/backend/src/sys/assert.cpp000066400000000000000000000044631223142177000202130ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file assert.cpp * \author Benjamin Segovia */ #if GBE_COMPILE_UTESTS #include "sys/assert.hpp" #include "sys/exception.hpp" #include "sys/cvar.hpp" #include #include namespace gbe { BVAR(OCL_BREAK_POINT_IN_ASSERTION, false); BVAR(OCL_ABORT_IN_ASSERTION, false); void onFailedAssertion(const char *msg, const char *file, const char *fn, int line) { char lineString[256]; sprintf(lineString, "%i", line); assert(msg != NULL && file != NULL && fn != NULL); const std::string str = "Compiler error: " + std::string(msg) + "\n at file " + std::string(file) + ", function " + std::string(fn) + ", line " + std::string(lineString); if (OCL_BREAK_POINT_IN_ASSERTION) DEBUGBREAK(); if (OCL_ABORT_IN_ASSERTION) { assert(false); exit(-1); } throw Exception(str); } } /* namespace gbe */ #else #include "sys/assert.hpp" #include "sys/exception.hpp" #include "sys/platform.hpp" #include #include #include namespace gbe { void onFailedAssertion(const char *msg, const char *file, const char *fn, int32_t line) { assert(msg != NULL && file != NULL && fn != NULL); fprintf(stderr, "ASSERTION FAILED: %s\n" " at file %s, function %s, line %i\n", msg, file, fn, line); fflush(stdout); DEBUGBREAK(); _exit(-1); } } /* namespace gbe */ #endif /* GBE_COMPILE_UTESTS */ Release_v0.3/backend/src/sys/assert.hpp000066400000000000000000000021761223142177000202170ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file assert.hpp * * \author Benjamin Segovia */ #ifndef __GBE_ASSERT_HPP__ #define __GBE_ASSERT_HPP__ namespace gbe { /*! To ensure that condition truth. Optional message is supported */ void onFailedAssertion(const char *msg, const char *file, const char *fn, int line); } /* namespace gbe */ #endif /* __GBE_ASSERT_HPP__ */ Release_v0.3/backend/src/sys/atomic.hpp000066400000000000000000000041531223142177000201670ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * */ #ifndef __GBE_ATOMIC_HPP__ #define __GBE_ATOMIC_HPP__ #include "sys/intrinsics.hpp" namespace gbe { template struct AtomicInternal { protected: AtomicInternal(const AtomicInternal&); // don't implement AtomicInternal& operator= (const AtomicInternal&); // don't implement public: INLINE AtomicInternal(void) {} INLINE AtomicInternal(T data) : data(data) {} INLINE AtomicInternal& operator =(const T input) { data = input; return *this; } INLINE operator T() const { return data; } INLINE void storeRelease(T x) { __store_release(&data, x); } public: INLINE friend T operator+= (AtomicInternal& value, T input) { return atomic_add(&value.data, input) + input; } INLINE friend T operator++ (AtomicInternal& value) { return atomic_add(&value.data, 1) + 1; } INLINE friend T operator-- (AtomicInternal& value) { return atomic_add(&value.data, -1) - 1; } INLINE friend T operator++ (AtomicInternal& value, int) { return atomic_add(&value.data, 1); } INLINE friend T operator-- (AtomicInternal& value, int) { return atomic_add(&value.data, -1); } INLINE friend T cmpxchg (AtomicInternal& value, const T v, const T c) { return atomic_cmpxchg(&value.data,v,c); } private: volatile T data; GBE_STRUCT(AtomicInternal); }; typedef AtomicInternal Atomic32; typedef AtomicInternal Atomic; } #endif /* __GBE_ATOMIC_HPP__ */ Release_v0.3/backend/src/sys/cvar.cpp000066400000000000000000000034061223142177000176410ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file cvar.cpp * \author Benjamin Segovia */ #include "sys/cvar.hpp" #include namespace gbe { CVarInit::CVarInit(const char *name, int32_t *addr, int32_t imin, int32_t i, int32_t imax) : varType(CVarInit::INTEGER) { this->i.min = imin; this->i.max = imax; const char *env = getenv(name); if (env != NULL) { sscanf(env, "%i", &i); i = std::min(imax, std::max(imin, i)); } *addr = i; } CVarInit::CVarInit(const char *name, float *addr, float fmin, float f, float fmax) : varType(CVarInit::FLOAT) { this->f.min = fmin; this->f.max = fmax; const char *env = getenv(name); if (env != NULL) { sscanf(env, "%f", &f); f = std::min(fmax, std::max(fmin, f)); } *addr = f; } CVarInit::CVarInit(const char *name, std::string *str, const std::string &v) : varType(CVarInit::STRING) { const char *env = getenv(name); *str = env != NULL ? env : v; } } /* namespace gbe */ Release_v0.3/backend/src/sys/cvar.hpp000066400000000000000000000053161223142177000176500ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file cvar.hpp * \author Benjamin Segovia * * Quake like console variable system. Just use the environment variables from * the console to change their value */ #ifndef __GBE_CVAR_HPP__ #define __GBE_CVAR_HPP__ #include "sys/platform.hpp" namespace gbe { /*! A CVar is either a float, an integer or a string value. CVarInit is only * here to set the global variable in pre-main */ class CVarInit { public: enum { STRING = 0, INTEGER = 1, FLOAT = 2 }; /*! Build a CVar from an integer environment variable */ explicit CVarInit(const char *name, int32_t *addr, int32_t imin, int32_t i, int32_t imax); /*! Build a CVar from a float environment variable */ explicit CVarInit(const char *name, float *addr, float fmin, float f, float fmax); /*! Build a CVar from a string environment variable */ explicit CVarInit(const char *name, std::string *str, const std::string &v); int varType; //!< STRING, INTEGER or FLOAT std::string *str; //!< string variable union { struct { int32_t min, *curr, max; } i; //!< integer variables with bounds struct { float min, *curr, max; } f; //!< float variables with bounds }; }; } /* namespace gbe */ /*! Declare an integer console variable */ #define IVAR(NAME, MIN, CURR, MAX) \ int32_t NAME; \ static gbe::CVarInit __CVAR##NAME##__LINE__##__(#NAME, &NAME, int32_t(MIN), int32_t(CURR), int32_t(MAX)); /*! Declare a float console variable */ #define FVAR(NAME, MIN, CURR, MAX) \ float NAME; \ static gbe::CVarInit __CVAR##NAME##__LINE__##__(#NAME, &NAME, float(MIN), float(CURR), float(MAX)); /*! Declare a string console variable */ #define SVAR(NAME, STR) \ std::string NAME; \ static gbe::CVarInit __CVAR##NAME##__LINE__##__(#NAME, &NAME, STR); /*! Declare a Boolean variable (just an integer in {0,1}) */ #define BVAR(NAME, CURR) IVAR(NAME, 0, CURR ? 1 : 0, 1) #endif /* __GBE_CVAR_HPP__ */ Release_v0.3/backend/src/sys/exception.hpp000066400000000000000000000031111223142177000207020ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file exception.hpp * * \author Benjamin Segovia */ #ifndef __GBE_EXCEPTION_HPP__ #define __GBE_EXCEPTION_HPP__ #if GBE_COMPILE_UTESTS #include #include namespace gbe { /*! Exception are only used while using unit tests */ class Exception : public std::exception { public: Exception(const std::string &msg) throw() : msg(msg) {} Exception(const Exception &other) throw() : msg(other.msg) {} ~Exception(void) throw() {} Exception &operator= (const Exception &other) throw() { this->msg = other.msg; return *this; } const char *what(void) const throw() { return msg.c_str(); } private: std::string msg; //!< String message }; } /* namespace gbe */ #endif /* GBE_COMPILE_UTESTS */ #endif /* __GBE_EXCEPTION_HPP__ */ Release_v0.3/backend/src/sys/fixed_array.hpp000066400000000000000000000053521223142177000212120ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file fixed_array.hpp * * \author Benjamin Segovia */ #ifndef __GBE_FIXED_ARRAY_HPP__ #define __GBE_FIXED_ARRAY_HPP__ #include "platform.hpp" #include namespace gbe { /*! Regular C array but with bound checks */ template class fixed_array { public: /*! Do not initialize the data */ fixed_array(void) {} /*! Copy the input array */ fixed_array(const T array[N]) { std::memcpy(elem, array, N * sizeof(T)); } /*! First element (non const) */ T* begin(void) { return &elem[0]; } /*! First non-valid element (non const) */ T* end(void) { return begin() + N; } /*! First element (const) */ const T* begin(void) const { return &elem[0]; } /*! First non-valid element (const) */ const T* end(void) const { return begin() + N; } /*! Number of elements in the array */ size_t size(void) const { return N; } /*! Get the pointer to the data (non-const) */ T* data(void) { return &elem[0]; } /*! Get the pointer to the data (const) */ const T* data(void) const { return &elem[0]; } /*! First element (const) */ const T& front(void) const { return *begin(); } /*! Last element (const) */ const T& back(void) const { return *(end() - 1); } /*! First element (non-const) */ T& front(void) { return *begin(); } /*! Last element (non-const) */ T& back(void) { return *(end() - 1); } /*! Get element at position index (with bound check) */ INLINE T& operator[] (size_t index) { GBE_ASSERT(index < size()); return elem[index]; } /*! Get element at position index (with bound check) */ INLINE const T& operator[] (size_t index) const { GBE_ASSERT(index < size()); return elem[index]; } private: T elem[N]; //!< Store the elements STATIC_ASSERT(N > 0); //!< zero element is not allowed GBE_CLASS(fixed_array); }; } /* namespace gbe */ #endif /* __GBE_FIXED_ARRAY_HPP__ */ Release_v0.3/backend/src/sys/hash_map.hpp000066400000000000000000000051571223142177000205000ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file hash_map.hpp * * \author Benjamin Segovia */ #ifndef __GBE_HASH_MAP_HPP__ #define __GBE_HASH_MAP_HPP__ #include "sys/platform.hpp" #ifdef __MSVC__ #include #else #include #endif /* __MSVC__ */ namespace gbe { /*! Add specific allocator to the hash map */ template , class Pred = std::equal_to> class hash_map : public std::tr1::unordered_map>>, public NonCopyable { public: // Typedefs typedef std::pair value_type; typedef Allocator allocator_type; typedef std::tr1::unordered_map parent_type; typedef typename allocator_type::size_type size_type; typedef Key key_type; typedef T mapped_type; typedef Hash hasher; typedef Pred key_equal; /*! Default constructor */ INLINE explicit hash_map(size_type n = 3, const hasher& hf = hasher(), const key_equal& eql = key_equal(), const allocator_type& a = allocator_type()) : parent_type(n, hf, eql, a) {} /*! Iteration constructor */ template INLINE hash_map(InputIterator first, InputIterator last, size_type n = 3, const hasher& hf = hasher(), const key_equal& eql = key_equal(), const allocator_type& a = allocator_type()) : parent_type(first,last,n,hf,eql,a) {} #if 0 /*! Copy constructor */ INLINE hash_map(const hash_map &other) : parent_type(other) {} #endif GBE_CLASS(hash_map); }; } /* namespace gbe */ #endif /* __GBE_HASH_MAP_HPP__ */ Release_v0.3/backend/src/sys/intrinsics.hpp000066400000000000000000000134641223142177000211050ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * */ #ifndef __GBE_INTRINSICS_HPP__ #define __GBE_INTRINSICS_HPP__ #include "sys/platform.hpp" #include #include #if defined(__MSVC__) #include #define GBE_COMPILER_WRITE_BARRIER _WriteBarrier() #define GBE_COMPILER_READ_WRITE_BARRIER _ReadWriteBarrier() #if _MSC_VER >= 1400 #pragma intrinsic(_ReadBarrier) #define GBE_COMPILER_READ_BARRIER _ReadBarrier() #else #define GBE_COMPILER_READ_BARRIER _ReadWriteBarrier() #endif /* _MSC_VER >= 1400 */ INLINE int __bsf(int v) { unsigned long r = 0; _BitScanForward(&r,v); return r; } INLINE int __bsr(int v) { unsigned long r = 0; _BitScanReverse(&r,v); return r; } INLINE int __btc(int v, int i) { long r = v; _bittestandcomplement(&r,i); return r; } INLINE int __bts(int v, int i) { long r = v; _bittestandset(&r,i); return r; } INLINE int __btr(int v, int i) { long r = v; _bittestandreset(&r,i); return r; } INLINE void memoryFence(void) { _mm_mfence(); } #if defined(__X86_64__) && !defined(__INTEL_COMPILER) INLINE size_t __bsf(size_t v) { unsigned long r = 0; _BitScanForward64(&r,v); return r; } INLINE size_t __bsr(size_t v) { unsigned long r = 0; _BitScanReverse64(&r,v); return r; } INLINE size_t __btc(size_t v, size_t i) { __int64_t r = v; _bittestandcomplement64(&r,i); return r; } INLINE size_t __bts(size_t v, size_t i) { __int64_t r = v; _bittestandset64(&r,i); return r; } INLINE size_t __btr(size_t v, size_t i) { __int64_t r = v; _bittestandreset64(&r,i); return r; } #endif /* defined(__X86_64__) && !defined(__INTEL_COMPILER) */ typedef int32_t atomic32_t; INLINE int32_t atomic_add(volatile int32_t* m, const int32_t v) { return _InterlockedExchangeAdd((volatile long*)m,v); } INLINE int32_t atomic_cmpxchg(volatile int32_t* m, const int32_t v, const int32_t c) { return _InterlockedCompareExchange((volatile long*)m,v,c); } #if defined(__X86_64__) typedef int64_t atomic_t; INLINE int64_t atomic_add(volatile int64_t* m, const int64_t v) { return _InterlockedExchangeAdd64(m,v); } INLINE int64_t atomic_cmpxchg(volatile int64_t* m, const int64_t v, const int64_t c) { return _InterlockedCompareExchange64(m,v,c); } #else typedef int32_t atomic_t; #endif /* defined(__X86_64__) */ #else INLINE unsigned int __popcnt(unsigned int in) { int r = 0; asm ("popcnt %1,%0" : "=r"(r) : "r"(in)); return r; } INLINE int __bsf(int v) { int r = 0; asm ("bsf %1,%0" : "=r"(r) : "r"(v)); return r; } INLINE int __bsr(int v) { int r = 0; asm ("bsr %1,%0" : "=r"(r) : "r"(v)); return r; } INLINE int __btc(int v, int i) { int r = 0; asm ("btc %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags"); return r; } INLINE int __bts(int v, int i) { int r = 0; asm ("bts %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags"); return r; } INLINE int __btr(int v, int i) { int r = 0; asm ("btr %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags"); return r; } INLINE size_t __bsf(size_t v) { size_t r = 0; asm ("bsf %1,%0" : "=r"(r) : "r"(v)); return r; } INLINE size_t __bsr(size_t v) { size_t r = 0; asm ("bsr %1,%0" : "=r"(r) : "r"(v)); return r; } INLINE size_t __btc(size_t v, size_t i) { size_t r = 0; asm ("btc %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags"); return r; } INLINE size_t __bts(size_t v, size_t i) { size_t r = 0; asm ("bts %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags"); return r; } INLINE size_t __btr(size_t v, size_t i) { size_t r = 0; asm ("btr %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags"); return r; } INLINE void memoryFence(void) { _mm_mfence(); } typedef int32_t atomic32_t; INLINE int32_t atomic_add(int32_t volatile* value, int32_t input) { asm volatile("lock xadd %0,%1" : "+r" (input), "+m" (*value) : "r" (input), "m" (*value)); return input; } INLINE int32_t atomic_cmpxchg(int32_t volatile* value, const int32_t input, int32_t comparand) { asm volatile("lock cmpxchg %2,%0" : "=m" (*value), "=a" (comparand) : "r" (input), "m" (*value), "a" (comparand) : "flags"); return comparand; } #if defined(__X86_64__) typedef int64_t atomic_t; INLINE int64_t atomic_add(int64_t volatile* value, int64_t input) { asm volatile("lock xaddq %0,%1" : "+r" (input), "+m" (*value) : "r" (input), "m" (*value)); return input; } INLINE int64_t atomic_cmpxchg(int64_t volatile* value, const int64_t input, int64_t comparand) { asm volatile("lock cmpxchgq %2,%0" : "+m" (*value), "+a" (comparand) : "r" (input), "m" (*value), "r" (comparand) : "flags"); return comparand; } #else typedef int32_t atomic_t; #endif /* defined(__X86_64__) */ #define GBE_COMPILER_READ_WRITE_BARRIER asm volatile("" ::: "memory"); #define GBE_COMPILER_WRITE_BARRIER GBE_COMPILER_READ_WRITE_BARRIER #define GBE_COMPILER_READ_BARRIER GBE_COMPILER_READ_WRITE_BARRIER #endif /* __MSVC__ */ template INLINE T __load_acquire(volatile T *ptr) { GBE_COMPILER_READ_WRITE_BARRIER; T x = *ptr; // for x86, load == load_acquire GBE_COMPILER_READ_WRITE_BARRIER; return x; } template INLINE void __store_release(volatile T *ptr, T x) { GBE_COMPILER_READ_WRITE_BARRIER; *ptr = x; // for x86, store == store_release GBE_COMPILER_READ_WRITE_BARRIER; } #endif /* __GBE_INTRINSICS_HPP__ */ Release_v0.3/backend/src/sys/intrusive_list.cpp000066400000000000000000000043621223142177000217730ustar00rootroot00000000000000/* * Copyright (c) 2007 Maciej Sinilo * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ #include "intrusive_list.hpp" namespace gbe { intrusive_list_base::intrusive_list_base() : m_root() {} intrusive_list_base::size_type intrusive_list_base::size() const { size_type numNodes(0); const intrusive_list_node* iter = &m_root; do { iter = iter->next; ++numNodes; } while (iter != &m_root); return numNodes - 1; } void append(intrusive_list_node *node, intrusive_list_node *prev) { GBE_ASSERT(!node->in_list()); node->next = prev->next; node->next->prev = node; prev->next = node; node->prev = prev; } void prepend(intrusive_list_node *node, intrusive_list_node *next) { GBE_ASSERT(!node->in_list()); node->prev = next->prev; node->prev->next = node; next->prev = node; node->next = next; } void link(intrusive_list_node* node, intrusive_list_node* nextNode) { prepend(node, nextNode); } void unlink(intrusive_list_node* node) { GBE_ASSERT(node->in_list()); node->prev->next = node->next; node->next->prev = node->prev; node->next = node->prev = node; } } /* namespace gbe */ Release_v0.3/backend/src/sys/intrusive_list.hpp000066400000000000000000000140751223142177000220020ustar00rootroot00000000000000/* * Copyright (c) 2007 Maciej Sinilo * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ #ifndef __GBE_INTRUSIVE_LIST_HPP__ #define __GBE_INTRUSIVE_LIST_HPP__ #include "sys/platform.hpp" namespace gbe { /*! List elements must inherit from it */ struct intrusive_list_node { INLINE intrusive_list_node(void) { next = prev = this; } INLINE bool in_list(void) const { return this != next; } intrusive_list_node *next; intrusive_list_node *prev; }; /*! Insert node such that prev -> node */ void append(intrusive_list_node *node, intrusive_list_node *prev); /*! Insert node such that node -> next */ void prepend(intrusive_list_node *node, intrusive_list_node *next); /*! Same as prepend */ void link(intrusive_list_node* node, intrusive_list_node* nextNode); /*! Remove the node from its current list */ void unlink(intrusive_list_node* node); template class intrusive_list_iterator { public: typedef Pointer pointer; typedef Reference reference; INLINE intrusive_list_iterator(void): m_node(0) {} INLINE intrusive_list_iterator(Pointer iterNode) : m_node(iterNode) {} INLINE Reference operator*(void) const { GBE_ASSERT(m_node); return *m_node; } INLINE Pointer operator->(void) const { return m_node; } INLINE Pointer node(void) const { return m_node; } INLINE intrusive_list_iterator& operator++(void) { m_node = static_cast(m_node->next); return *this; } INLINE intrusive_list_iterator& operator--(void) { m_node = static_cast(m_node->prev); return *this; } INLINE intrusive_list_iterator operator++(int) { intrusive_list_iterator copy(*this); ++(*this); return copy; } INLINE intrusive_list_iterator operator--(int) { intrusive_list_iterator copy(*this); --(*this); return copy; } INLINE bool operator== (const intrusive_list_iterator& rhs) const { return rhs.m_node == m_node; } INLINE bool operator!= (const intrusive_list_iterator& rhs) const { return !(rhs == *this); } private: Pointer m_node; }; class intrusive_list_base { public: typedef size_t size_type; INLINE void pop_back(void) { unlink(m_root.prev); } INLINE void pop_front(void) { unlink(m_root.next); } INLINE bool empty(void) const { return !m_root.in_list(); } size_type size(void) const; protected: intrusive_list_base(void); INLINE ~intrusive_list_base(void) {} intrusive_list_node m_root; private: intrusive_list_base(const intrusive_list_base&); intrusive_list_base& operator=(const intrusive_list_base&); }; template class intrusive_list : public intrusive_list_base { public: typedef T node_type; typedef T value_type; typedef intrusive_list_iterator iterator; typedef intrusive_list_iterator const_iterator; intrusive_list(void) : intrusive_list_base() { intrusive_list_node* testNode((T*)0); static_cast(sizeof(testNode)); } void push_back(value_type* v) { link(v, &m_root); } void push_front(value_type* v) { link(v, m_root.next); } iterator begin(void) { return iterator(upcast(m_root.next)); } iterator end(void) { return iterator(upcast(&m_root)); } iterator rbegin(void) { return iterator(upcast(m_root.prev)); } iterator rend(void) { return iterator(upcast(&m_root)); } const_iterator begin(void) const { return const_iterator(upcast(m_root.next)); } const_iterator end(void) const { return const_iterator(upcast(&m_root)); } const_iterator rbegin(void) const { return const_iterator(upcast(m_root.prev)); } const_iterator rend(void) const { return const_iterator(upcast(&m_root)); } INLINE value_type* front(void) { return upcast(m_root.next); } INLINE value_type* back(void) { return upcast(m_root.prev); } INLINE const value_type* front(void) const { return upcast(m_root.next); } INLINE const value_type* back(void) const { return upcast(m_root.prev); } iterator insert(iterator pos, value_type* v) { link(v, pos.node()); return iterator(v); } iterator erase(iterator it) { iterator itErase(it); ++it; unlink(itErase.node()); return it; } iterator erase(iterator first, iterator last) { while (first != last) first = erase(first); return first; } void clear(void) { erase(begin(), end()); } void fast_clear(void) { m_root.next = m_root.prev = &m_root; } static void remove(value_type* v) { unlink(v); } private: static INLINE node_type* upcast(intrusive_list_node* n) { return static_cast(n); } static INLINE const node_type* upcast(const intrusive_list_node* n) { return static_cast(n); } }; } /* namespace gbe */ #endif /* __GBE_INTRUSIVE_LIST_HPP__ */ Release_v0.3/backend/src/sys/list.hpp000066400000000000000000000040211223142177000176600ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file list.hpp * * \author Benjamin Segovia */ #ifndef __GBE_LIST_HPP__ #define __GBE_LIST_HPP__ #include "sys/platform.hpp" #include namespace gbe { /*! Use custom allocator instead of std one */ template class list : public std::list> { public: // Typedefs typedef T value_type; typedef Allocator allocator_type; typedef std::list parent_type; typedef typename allocator_type::size_type size_type; /*! Default constructor */ INLINE explicit list(const allocator_type &a = allocator_type()) : parent_type(a) {} /*! Repetitive constructor */ INLINE explicit list(size_type n, const T &value = T(), const allocator_type &a = allocator_type()) : parent_type(n, value, a) {} /*! Iteration constructor */ template INLINE list(InputIterator first, InputIterator last, const allocator_type &a = allocator_type()) : parent_type(first, last, a) {} /*! Copy constructor */ INLINE list(const list &x) : parent_type(x) {} GBE_CLASS(list); }; } /* namespace gbe */ #endif /* __GBE_LIST_HPP__ */ Release_v0.3/backend/src/sys/map.hpp000066400000000000000000000047461223142177000175000ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file map.hpp * * \author Benjamin Segovia */ #ifndef __GBE_MAP_HPP__ #define __GBE_MAP_HPP__ #include "sys/platform.hpp" #include namespace gbe { /*! Use custom allocator instead of std one */ template> class map : public std::map>>, public NonCopyable { public: // Typedefs typedef std::pair value_type; typedef Allocator allocator_type; typedef std::map parent_type; typedef Key key_type; typedef T mapped_type; typedef Pred key_compare; typedef typename allocator_type::pointer pointer; typedef typename allocator_type::const_pointer const_pointer; typedef typename allocator_type::reference reference; typedef typename allocator_type::const_reference const_reference; /*! Default constructor */ INLINE map(const key_compare &comp = key_compare(), const allocator_type &a = allocator_type()) : parent_type(comp, a) {} /*! Iteration constructor */ template INLINE map(InputIterator first, InputIterator last, const key_compare &comp = key_compare(), const allocator_type& a = allocator_type()) : parent_type(first, last, comp, a) {} #if 0 /*! Copy constructor */ INLINE map(const map& x) : parent_type(x) {} #endif /*! Better than using find if we do not care about the iterator itself */ INLINE bool contains(const Key &key) const { return this->find(key) != this->end(); } GBE_CLASS(map); }; } /* namespace gbe */ #endif /* __GBE_MAP_HPP__ */ Release_v0.3/backend/src/sys/mutex.cpp000066400000000000000000000033701223142177000200500ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * */ #include "sys/mutex.hpp" #if defined(__WIN32__) #define WIN32_LEAN_AND_MEAN #include namespace gbe { /*! system mutex using windows API */ MutexSys::MutexSys( void ) { mutex = new CRITICAL_SECTION; InitializeCriticalSection((CRITICAL_SECTION*)mutex); } MutexSys::~MutexSys( void ) { DeleteCriticalSection((CRITICAL_SECTION*)mutex); delete ((CRITICAL_SECTION*)mutex); } void MutexSys::lock( void ) { EnterCriticalSection((CRITICAL_SECTION*)mutex); } void MutexSys::unlock( void ) { LeaveCriticalSection((CRITICAL_SECTION*)mutex); } } #endif #if defined(__UNIX__) #include namespace gbe { /*! system mutex using pthreads */ MutexSys::MutexSys( void ) { mutex = new pthread_mutex_t; pthread_mutex_init((pthread_mutex_t*)mutex, NULL); } MutexSys::~MutexSys( void ) { pthread_mutex_destroy((pthread_mutex_t*)mutex); delete ((pthread_mutex_t*)mutex); } void MutexSys::lock( void ) { pthread_mutex_lock((pthread_mutex_t*)mutex); } void MutexSys::unlock( void ) { pthread_mutex_unlock((pthread_mutex_t*)mutex); } } #endif Release_v0.3/backend/src/sys/mutex.hpp000066400000000000000000000041531223142177000200550ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * */ #ifndef __GBE_MUTEX_HPP__ #define __GBE_MUTEX_HPP__ #include "platform.hpp" #include "atomic.hpp" #include namespace gbe { class MutexSys { friend class ConditionSys; public: MutexSys(void); ~MutexSys(void); void lock(void); void unlock(void); protected: void* mutex; MutexSys(const MutexSys&); // don't implement MutexSys& operator= (const MutexSys&); // don't implement GBE_CLASS(MutexSys); }; /*! active mutex */ class MutexActive { public: INLINE MutexActive(void) : _lock(LOCK_IS_FREE) {} INLINE void lock(void) { GBE_COMPILER_READ_BARRIER; while (cmpxchg(_lock, LOCK_IS_TAKEN, LOCK_IS_FREE) != LOCK_IS_FREE) _mm_pause(); GBE_COMPILER_READ_BARRIER; } INLINE void unlock(void) { _lock.storeRelease(LOCK_IS_FREE); } protected: enum { LOCK_IS_FREE = 0, LOCK_IS_TAKEN = 1 }; Atomic _lock; MutexActive(const MutexActive&); // don't implement MutexActive& operator=(const MutexActive&); // don't implement GBE_CLASS(MutexActive); }; /*! safe mutex lock and unlock helper */ template class Lock { public: Lock (Mutex& mutex) : mutex(mutex) { mutex.lock(); } ~Lock() { mutex.unlock(); } protected: Mutex& mutex; Lock(const Lock&); // don't implement Lock& operator= (const Lock&); // don't implement GBE_CLASS(Lock); }; } #endif /* __GBE_MUTEX_HPP__ */ Release_v0.3/backend/src/sys/platform.cpp000066400000000000000000000040641223142177000205330ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * */ #include "sys/platform.hpp" #include "sys/intrinsics.hpp" #include //////////////////////////////////////////////////////////////////////////////// /// Windows Platform //////////////////////////////////////////////////////////////////////////////// #ifdef __WIN32__ #define WIN32_LEAN_AND_MEAN #include namespace gbe { double getSeconds() { LARGE_INTEGER freq, val; QueryPerformanceFrequency(&freq); QueryPerformanceCounter(&val); return (double)val.QuadPart / (double)freq.QuadPart; } void FATAL(const std::string &msg) { std::cerr << msg << std::endl; MessageBox(NULL, msg.c_str(), "Fatal Error", MB_OK | MB_ICONEXCLAMATION); GBE_ASSERT(0); #ifdef __GNUC__ exit(-1); #else _exit(-1); #endif /* __GNUC__ */ } } /* namespace gbe */ #endif /* __WIN32__ */ //////////////////////////////////////////////////////////////////////////////// /// Unix Platform //////////////////////////////////////////////////////////////////////////////// #if defined(__UNIX__) #include #include namespace gbe { double getSeconds() { struct timeval tp; gettimeofday(&tp,NULL); return double(tp.tv_sec) + double(tp.tv_usec)/1E6; } void FATAL(const std::string &msg) { std::cerr << msg << std::endl; GBE_ASSERT(0); _exit(-1); } } /* namespace gbe */ #endif /* __UNIX__ */ Release_v0.3/backend/src/sys/platform.hpp000066400000000000000000000303441223142177000205400ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #ifndef __GBE_PLATFORM_HPP__ #define __GBE_PLATFORM_HPP__ #include #include #include #include #include #include #include #include #include //////////////////////////////////////////////////////////////////////////////// /// CPU architecture //////////////////////////////////////////////////////////////////////////////// /* detect 32 or 64 platform */ #if defined(__x86_64__) || defined(__ia64__) || defined(_M_X64) #define __X86_64__ #else #define __X86__ #endif /* We require SSE ... */ #ifndef __SSE__ #define __SSE__ #endif /* ... and SSE2 */ #ifndef __SSE2__ #define __SSE2__ #endif #if defined(_INCLUDED_IMM) // #define __AVX__ #endif #if defined(_MSC_VER) && (_MSC_VER < 1600) && !defined(__INTEL_COMPILER) || defined(_DEBUG) && defined(_WIN32) #define __NO_AVX__ #endif #if defined(_MSC_VER) && !defined(__SSE4_2__) // #define __SSE4_2__ //! activates SSE4.2 support #endif //////////////////////////////////////////////////////////////////////////////// /// Operating system //////////////////////////////////////////////////////////////////////////////// /* detect Linux platform */ #if defined(linux) || defined(__linux__) || defined(__LINUX__) # if !defined(__LINUX__) # define __LINUX__ # endif # if !defined(__UNIX__) # define __UNIX__ # endif #endif /* detect FreeBSD platform */ #if defined(__FreeBSD__) || defined(__FREEBSD__) # if !defined(__FREEBSD__) # define __FREEBSD__ # endif # if !defined(__UNIX__) # define __UNIX__ # endif #endif /* detect Windows 95/98/NT/2000/XP/Vista/7 platform */ #if (defined(WIN32) || defined(_WIN32) || defined(__WIN32__) || defined(__NT__)) && !defined(__CYGWIN__) # if !defined(__WIN32__) # define __WIN32__ # endif #endif /* detect Cygwin platform */ #if defined(__CYGWIN__) # if !defined(__UNIX__) # define __UNIX__ # endif #endif /* detect MAC OS X platform */ #if defined(__APPLE__) || defined(MACOSX) || defined(__MACOSX__) # if !defined(__MACOSX__) # define __MACOSX__ # endif # if !defined(__UNIX__) # define __UNIX__ # endif #endif /* try to detect other Unix systems */ #if defined(__unix__) || defined (unix) || defined(__unix) || defined(_unix) # if !defined(__UNIX__) # define __UNIX__ # endif #endif //////////////////////////////////////////////////////////////////////////////// /// Compiler //////////////////////////////////////////////////////////////////////////////// /*! GCC compiler */ #ifdef __GNUC__ // #define __GNUC__ #endif /*! Intel compiler */ #ifdef __INTEL_COMPILER #define __ICC__ #endif /*! Visual C compiler */ #ifdef _MSC_VER #define __MSVC__ #endif //////////////////////////////////////////////////////////////////////////////// /// Makros //////////////////////////////////////////////////////////////////////////////// #ifdef __WIN32__ #define __dllexport extern "C" __declspec(dllexport) #define __dllimport extern "C" __declspec(dllimport) #else #define __dllexport extern "C" #define __dllimport extern "C" #endif #ifdef __MSVC__ #undef NOINLINE #define NOINLINE __declspec(noinline) #define INLINE __forceinline #define RESTRICT __restrict #define THREAD __declspec(thread) #define ALIGNED(...) __declspec(align(__VA_ARGS__)) //#define __FUNCTION__ __FUNCTION__ #define DEBUGBREAK() __debugbreak() #else #undef NOINLINE #undef INLINE #define NOINLINE __attribute__((noinline)) #define INLINE inline __attribute__((always_inline)) #define RESTRICT __restrict #define THREAD __thread #define ALIGNED(...) __attribute__((aligned(__VA_ARGS__))) #define __FUNCTION__ __PRETTY_FUNCTION__ #define DEBUGBREAK() asm ("int $3") #endif /*! Modern x86 processors */ #define CACHE_LINE 64 #define CACHE_LINE_ALIGNED ALIGNED(CACHE_LINE) #ifdef __GNUC__ #define MAYBE_UNUSED __attribute__((used)) #else #define MAYBE_UNUSED #endif #if defined(_MSC_VER) #define __builtin_expect(expr,b) expr #endif /*! Debug syntactic sugar */ #if GBE_DEBUG #define IF_DEBUG(EXPR) EXPR #else #define IF_DEBUG(EXPR) #endif /* GBE_DEBUG */ /*! Debug printing macros */ #define STRING(x) #x #define PING std::cout << __FILE__ << " (" << __LINE__ << "): " << __FUNCTION__ << std::endl #define PRINT(x) std::cout << STRING(x) << " = " << (x) << std::endl /*! Branch hint */ #define LIKELY(x) __builtin_expect(!!(x),1) #define UNLIKELY(x) __builtin_expect((x),0) /*! Stringify macros */ #define JOIN(X, Y) _DO_JOIN(X, Y) #define _DO_JOIN(X, Y) _DO_JOIN2(X, Y) #define _DO_JOIN2(X, Y) X##Y /*! Run-time assertion */ #if GBE_DEBUG #define GBE_ASSERT(EXPR) do { \ if (UNLIKELY(!(EXPR))) \ gbe::onFailedAssertion(#EXPR, __FILE__, __FUNCTION__, __LINE__); \ } while (0) #define GBE_ASSERTM(EXPR, MSG) do { \ if (UNLIKELY(!(EXPR))) \ gbe::onFailedAssertion(MSG, __FILE__, __FUNCTION__, __LINE__); \ } while (0) #else #define GBE_ASSERT(EXPR) do { } while (0) #define GBE_ASSERTM(EXPR, MSG) do { } while (0) #endif /* GBE_DEBUG */ #define NOT_IMPLEMENTED GBE_ASSERTM (false, "Not implemented") #define NOT_SUPPORTED GBE_ASSERTM (false, "Not supported") /*! Fatal error macros */ #define FATAL_IF(COND, MSG) \ do { \ if(UNLIKELY(COND)) FATAL(MSG); \ } while (0) /* Safe deletion macros */ #define GBE_SAFE_DELETE_ARRAY(x) do { if (x != NULL) GBE_DELETE_ARRAY(x); } while (0) #define GBE_SAFE_DELETE(x) do { if (x != NULL) GBE_DELETE(x); } while (0) /* Number of elements in an array */ #define ARRAY_ELEM_NUM(x) (sizeof(x) / sizeof(x[0])) /* Align X on A */ #define ALIGN(X,A) (((X) % (A)) ? ((X) + (A) - ((X) % (A))) : (X)) /*! Produce a string from the macro locatiom */ #define HERE (STRING(__LINE__) "@" __FILE__) /*! Typesafe encapusalation of a type (mostly for integers) */ #define TYPE_SAFE(SAFE, UNSAFE) \ class SAFE \ { \ public: \ INLINE SAFE(void) {} \ explicit INLINE SAFE(uint16_t unsafe) : unsafe(unsafe) {} \ INLINE operator UNSAFE (void) const { return unsafe; } \ UNSAFE value(void) const { return unsafe; } \ private: \ UNSAFE unsafe; \ }; /*! Default alignment for the platform */ #define GBE_DEFAULT_ALIGNMENT 16 /*! Useful constants */ #define KB 1024 #define MB (KB*KB) /*! Portable AlignOf */ template struct AlignOf { struct Helper { char x; T t; }; enum { value = offsetof(Helper, t) }; }; //gcc 4.8+ support C++11 alignof keyword #if (__GNUC__ >= 4 && __GNUC_MINOR__ >= 8) #define ALIGNOF(T) (alignof(T)) #else #define ALIGNOF(T) (AlignOf::value) #endif //////////////////////////////////////////////////////////////////////////////// /// Visibility parameters (DLL export and so on) //////////////////////////////////////////////////////////////////////////////// #if defined __WIN32__ #if defined __GNUC__ #define GBE_EXPORT_SYMBOL __attribute__ ((dllexport)) #define GBE_IMPORT_SYMBOL __attribute__ ((dllimport)) #else #define GBE_IMPORT_SYMBOL __declspec(dllimport) #define GBE_EXPORT_SYMBOL __declspec(dllexport) #endif /* __GNUC__ */ #else #define GBE_EXPORT_SYMBOL __attribute__ ((visibility ("default"))) #define GBE_IMPORT_SYMBOL #endif /* __WIN32__ */ //////////////////////////////////////////////////////////////////////////////// /// Basic Types //////////////////////////////////////////////////////////////////////////////// #if defined(__MSVC__) typedef __int64_t int64_t; typedef unsigned __int64_t uint64_t; typedef __int32_t int32_t; typedef unsigned __int32_t uint32_t; typedef __int16_t int16_t; typedef unsigned __int16_t uint16_t; typedef __int8_t int8_t; typedef unsigned __int8_t uint8_t; #else #include #endif #if defined(__X86_64__) typedef int64_t index_t; #else typedef int32_t index_t; #endif /*! To protect some classes from being copied */ class NonCopyable { protected: INLINE NonCopyable(void) {} INLINE ~NonCopyable(void) {} private: INLINE NonCopyable(const NonCopyable&) {} INLINE NonCopyable& operator= (const NonCopyable&) {return *this;} }; #define TO_MAGIC(A, B, C, D) (A<<24 | B<<16 | C<<8 | D) class Serializable { public: INLINE Serializable(void) = default; INLINE Serializable(const Serializable&) = default; INLINE Serializable& operator= (const Serializable&) = default; virtual size_t serializeToBin(std::ostream& outs) = 0; virtual size_t deserializeFromBin(std::istream& ins) = 0; /* These two will follow LLVM's ABI. */ virtual size_t serializeToLLVM(void) { return 0;/* not implemented now. */} virtual size_t deserializeFromLLVM(void) { return 0;/* not implemented now. */} virtual void printStatus(int indent = 0, std::ostream& outs = std::cout) { } virtual ~Serializable(void) { } protected: static std::string indent_to_str(int indent) { std::string ind(indent, ' '); return ind; } }; /* Help Macro for serialization. */ #define SERIALIZE_OUT(elt, out, sz) \ do { \ auto tmp_val = elt; \ out.write((char *)(&tmp_val), sizeof(elt)); \ sz += sizeof(elt); \ } while(0) #define DESERIALIZE_IN(elt, in, sz) \ do { \ in.read((char *)(&(elt)), sizeof(elt)); \ sz += sizeof(elt); \ } while(0) //////////////////////////////////////////////////////////////////////////////// /// Disable some compiler warnings //////////////////////////////////////////////////////////////////////////////// #ifdef __ICC__ #pragma warning(disable:265) // floating-point operation result is out of range #pragma warning(disable:383) // value copied to temporary, reference to temporary used #pragma warning(disable:869) // parameter was never referenced #pragma warning(disable:981) // operands are evaluated in unspecified order #pragma warning(disable:1418) // external function definition with no prior declaration #pragma warning(disable:1419) // external declaration in primary source file #pragma warning(disable:1572) // floating-point equality and inequality comparisons are unreliable #pragma warning(disable:1125) // virtual function override intended? #endif /* __ICC__ */ //////////////////////////////////////////////////////////////////////////////// /// Default Includes and Functions //////////////////////////////////////////////////////////////////////////////// #include "sys/alloc.hpp" namespace gbe { /*! selects */ INLINE bool select(bool s, bool t , bool f) { return s ? t : f; } INLINE int select(bool s, int t, int f) { return s ? t : f; } INLINE float select(bool s, float t, float f) { return s ? t : f; } /*! Fatal error function */ void FATAL(const std::string&); /*! Return the next power of 2 */ INLINE uint32_t nextHighestPowerOf2(uint32_t x) { x--; x |= x >> 1; x |= x >> 2; x |= x >> 4; x |= x >> 8; x |= x >> 16; return ++x; } INLINE uint32_t logi2(uint32_t x) { uint32_t r = 0; while(x >>= 1) r++; return r; } template INLINE uint32_t isPowerOf(uint32_t i) { while (i > 1) { if (i%N) return false; i = i/N; } return true; } template<> INLINE uint32_t isPowerOf<2>(uint32_t i) { return ((i-1)&i) == 0; } /*! random functions */ template T random() { return T(0); } template<> INLINE int32_t random() { return int(rand()); } template<> INLINE uint32_t random() { return uint32_t(rand()); } template<> INLINE float random() { return random()/float(RAND_MAX); } template<> INLINE double random() { return random()/double(RAND_MAX); } /** returns performance counter in seconds */ double getSeconds(); } /* namespace gbe */ #endif /* __GBE_PLATFORM_HPP__ */ Release_v0.3/backend/src/sys/set.hpp000066400000000000000000000042161223142177000175060ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file set.hpp * * \author Benjamin Segovia */ #ifndef __GBE_SET_HPP__ #define __GBE_SET_HPP__ #include "sys/platform.hpp" #include namespace gbe { /*! Add our custom allocator to std::set */ template> class set : public std::set>, public NonCopyable { public: // Typedefs typedef Key value_type; typedef Allocator allocator_type; typedef std::set> parent_type; typedef Key key_type; typedef Pred key_compare; /*! Default constructor */ INLINE set(const key_compare &comp = key_compare(), const allocator_type &a = allocator_type()) : parent_type(comp, a) {} /*! Iteration constructor */ template INLINE set(InputIterator first, InputIterator last, const key_compare &comp = key_compare(), const allocator_type& a = allocator_type()) : parent_type(first, last, comp, a) {} #if 0 /*! Copy constructor */ INLINE set(const set& x) : parent_type(x) {} #endif /*! Better than using find if we do not care about the iterator itself */ INLINE bool contains(const Key &key) const { return this->find(key) != this->end(); } GBE_CLASS(set); }; } /* namespace gbe */ #endif /* __GBE_SET_HPP__ */ Release_v0.3/backend/src/sys/vector.hpp000066400000000000000000000050501223142177000202120ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file vector.hpp * \author Benjamin Segovia */ #ifndef __GBE_VECTOR_HPP__ #define __GBE_VECTOR_HPP__ #include "sys/platform.hpp" #include namespace gbe { /*! Add bound checks to the standard vector class and use the internal * allocator */ template class vector : public std::vector> { public: // Typedefs typedef std::vector> parent_type; typedef Allocator allocator_type; typedef typename allocator_type::size_type size_type; typedef typename parent_type::iterator iterator; /*! Default constructor */ INLINE explicit vector(const allocator_type &a = allocator_type()) : parent_type(a) {} #if 0 /*! Copy constructor */ INLINE vector(const vector &x) : parent_type(x) {} #endif /*! Repetitive sequence constructor */ INLINE explicit vector(size_type n, const T& value= T(), const allocator_type &a = allocator_type()) : parent_type(n, value, a) {} /*! Iteration constructor */ template INLINE vector(InputIterator first, InputIterator last, const allocator_type &a = allocator_type()) : parent_type(first, last, a) {} /*! Get element at position index (with a bound check) */ T &operator[] (size_t index) { GBE_ASSERT(index < this->size()); return parent_type::operator[] (index); } /*! Get element at position index (with a bound check) */ const T &operator[] (size_t index) const { GBE_ASSERT(index < this->size()); return parent_type::operator[] (index); } GBE_CLASS(vector); }; } /* namespace gbe */ #endif /* __GBE_VECTOR_HPP__ */ Release_v0.3/backend/src/update.sh000077500000000000000000000000611223142177000171770ustar00rootroot00000000000000#! /bin/sh -e ./update_as.sh ./update_convert.sh Release_v0.3/backend/src/update_as.sh000077500000000000000000000003021223142177000176600ustar00rootroot00000000000000#! /bin/sh -e AS_HEADER=ocl_as.h exec >$AS_HEADER.tmp echo "// This file is autogenerated by gen_as.sh." echo "// Don't modify it manually." ./gen_as.sh exec >&2 mv $AS_HEADER.tmp $AS_HEADER Release_v0.3/backend/src/update_blob_ocl_header.py000077500000000000000000000041301223142177000223610ustar00rootroot00000000000000#!/usr/bin/env python # # Copyright (C) 2012 Intel Corporation # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2 of the License, or (at your option) any later version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library. If not, see . # # Author: Zhigang Gong #/ import sys import os if len(sys.argv) != 3: print "Invalid argument {}".format(sys.argv) print "use {} tmpl_file_name output_file_name".format(sys.argv[0]) raise def safeUnlink(filename): try: os.remove(filename) except OSError: pass header_segments = [ "vector", "as", "convert", "common_defines"] blobFileName = sys.argv[2] blobTempName = sys.argv[2] + '.tmp' safeUnlink(blobFileName) tmplFile = open(sys.argv[1], 'r') blob = open(sys.argv[2] + '.tmp', 'w') path = os.path.dirname(sys.argv[1]) if path == '': path = '.' matched_header = "" for tline in tmplFile: if matched_header == "": blob.write(tline) for header in header_segments: if tline.strip() == '// ##BEGIN_{}##'.format(header.upper()) : hFile = open(path + '/ocl_' + header + '.h', 'r') lineNr = 0 for hline in hFile: if lineNr >= 2: #ignore the 2 lines of comment at the top of file. blob.write(hline) lineNr += 1 hFile.close() matched_header = header else: if tline.strip() == '// ##END_{}##'.format(matched_header.upper()) : blob.write(tline) matched_header = ""; tmplFile.close() blob.close() os.rename(blobTempName, blobFileName) Release_v0.3/backend/src/update_convert.sh000077500000000000000000000003461223142177000207450ustar00rootroot00000000000000#! /bin/sh -e CONVERT_HEADER=ocl_convert.h exec >$CONVERT_HEADER.tmp echo "// This file is autogenerated by gen_convert.sh." echo "// Don't modify it manually." ./gen_convert.sh exec >&2 mv $CONVERT_HEADER.tmp $CONVERT_HEADER Release_v0.3/docs/000077500000000000000000000000001223142177000141335ustar00rootroot00000000000000Release_v0.3/docs/Beignet.mdwn000066400000000000000000000114761223142177000164100ustar00rootroot00000000000000Beignet ======= Beignet is an open source implementaion of the OpenCL specification - a generic compute oriented API. This code base contains the code to run OpenCL programs on Intel GPUs which bsically defines and implements the OpenCL host functions required to initialize the device, create the command queues, the kernels and the programs and run them on the GPU. The code base also contains the compiler part of the stack which is included in `backend/`. For more specific information about the compiler, please refer to `backend/README.md` How to build ------------ The project uses CMake with three profiles: 1. Debug (-g) 2. RelWithDebInfo (-g with optimizations) 3. Release (only optimizations) Basically, from the root directory of the project `> mkdir build` `> cd build` `> cmake ../ # to configure` Choose whatever you want for the build. Then press 'c' to configure and 'g' to generate the code. `> make` The project depends on several external libraries: - Several X components (XLib, Xfixes, Xext) - libdrm libraries (libdrm and libdrm\_intel) - Various LLVM components - The compiler backend itself (libgbe) - Mesa git master version built with gbm enabled to support extension cl\_khr\_gl\_sharing. CMake will check the dependencies and will complain if it does not find them. The cmake will also build the backend project. Please refer to: [[OpenCL Gen Backend|Beignet/Backend]] to get more dependencies. Once built, the run-time produces a shared object libcl.so which basically directly implements the OpenCL API. A set of tests are also produced. They may be found in `utests/`. Note that the compiler depends on LLVM (Low-Level Virtual Machine project). Right now, the code has been compiled with LLVM 3.1/3.2. It will not compile with any thing older. [http://llvm.org/releases/](http://llvm.org/releases/) LLVM 3.1,3.2,3.3 and 3.4 are supported. Also note that the code was compiled on GCC 4.6 and GCC 4.7. Since the code uses really recent C++11 features, you may expect problems with older compilers. Last time I tried, the code breaks ICC 12 and Clang with internal compiler errors while compiling anonymous nested lambda functions. How to run ---------- Apart from the OpenCL library itself that can be used by any OpenCL application, this code also produces various tests to ensure the compiler and the run-time consistency. This small test framework uses a simple c++ registration system to register all the unit tests. You need to set the variable `OCL_KERNEL_PATH` to locate the OCL kernels. They are with the run-time in `./kernels`. Then in `utests/`: `> ./utest_run` will run all the unit tests one after the others `> ./utest_run some_unit_test0 some_unit_test1` will only run `some_unit_test0` and `some_unit_test1` tests Supported Hardware ------------------ The code was tested on IVB GT2 with ubuntu and fedora core distribution. Currently Only IVB is supported right now. Actually, the code was only run on IVB GT2. You may expect some issues with IVB GT1. TODO ---- The run-time is far from being complete. Most of the pieces have been put together to test and develop the OpenCL compiler. A partial list of things to do: - Complete cl\_khr\_gl\_sharing support. We lack of some APIs implementation such as clCreateFromGLBuffer,clCreateFromGLRenderbuffer,clGetGLObjectInfo... Currently, the working APIs are clCreateFromGLTexture,clCreateFromGLTexture2D. - Check that NDRangeKernels can be pushed into _different_ queues from several threads. - No state tracking at all. One batch buffer is created at each "draw call" (i.e. for each NDRangeKernels). This is really inefficient since some expensive pipe controls are issued for each batch buffer - Valgrind reports some leaks in libdrm. It sounds like a false positive but it has to be checked. Idem for LLVM. There is one leak here to check. More generally, everything in the run-time that triggers the "FATAL" macro means that something that must be supported is not implemented properly (either it does not comply with the standard or it is just missing) Project repository ------------------ Right now, we host our project on fdo at: git://anongit.freedesktop.org/beignet. The team -------- This project was created by Ben Segovia when he was working for Intel. Now we have a team in China OTC graphics department continue to work on this project. The official contact for this project is: Zou Nanhai (). How to contribute ----------------- You are always welcome to contribute to this project, just need to subscribe to the beignet mail list and send patches to it for review. The official mail list is as below: [http://lists.freedesktop.org/mailman/listinfo/beignet](http://lists.freedesktop.org/mailman/listinfo/beignet) The wiki url is as below: [http://www.freedesktop.org/wiki/Software/Beignet/](http://www.freedesktop.org/wiki/Software/Beignet/) Release_v0.3/docs/Beignet/000077500000000000000000000000001223142177000155105ustar00rootroot00000000000000Release_v0.3/docs/Beignet/Backend.mdwn000066400000000000000000000037141223142177000177330ustar00rootroot00000000000000Beignet Compiler ================ This code base contains the compiler part of the Beignet OpenCL stack. The compiler is responsible to take a OpenCL language string and to compile it into a binary that can be executed on Intel integrated GPUs. Limitations ----------- Today, the compiler is far from complete. See [[here|Backend/TODO]] for a (incomplete) lists of things to do. Interface with the run-time --------------------------- Even if the compiler makes a very liberal use of C++ (templates, variadic templates, macros), we really tried hard to make a very simple interface with the run-time. The interface is therefore a pure C99 interface and it is defined in `src/backend/program.h`. The goal is to hide the complexity of the inner data structures and to enable simple run-time implementation using straightforward C99. Note that the data structures are fully opaque: this allows us to use both the C++ simulator or the real Gen program in a relatively non-intrusive way. Various environment variables ----------------------------- Environment variables are used all over the code. Most important ones are: - `OCL_SIMD_WIDTH` `(8 or 16)`. Change the number of lanes per hardware thread - `OCL_OUTPUT_GEN_IR` `(0 or 1)`. Output Gen IR (scalar intermediate representation) code - `OCL_OUTPUT_LLVM` `(0 or 1)`. Output LLVM code after the lowering passes - `OCL_OUTPUT_LLVM_BEFORE_EXTRA_PASS` `(0 or 1)`. Output LLVM code before the lowering passes - `OCL_OUTPUT_ASM` `(0 or 1)`. Output Gen ISA - `OCL_OUTPUT_REG_ALLOC` `(0 or 1)`. Output Gen register allocations Implementation details ---------------------- Several key decisions may use the hardware in an usual way. See the following documents for the technical details about the compiler implementation: - [[Flat address space|flat_address_space]] - [[Unstructured branches|unstructured_branches]] - [[Scalar intermediate representation|gen_ir]] - [[Clean backend implementation|compiler_backend]] Ben Segovia. Release_v0.3/docs/Beignet/Backend/000077500000000000000000000000001223142177000170375ustar00rootroot00000000000000Release_v0.3/docs/Beignet/Backend/TODO.mdwn000066400000000000000000000063171223142177000205020ustar00rootroot00000000000000TODO ==== The compiler is far from complete. Even if the skeleton is now done and should be solid, There are a _lot_ of things to do from trivial to complex. OpenCL standard library ----------------------- Today we define the OpenCL API in header file `src/ocl_stdlib.h`. This file is from being complete. By the way, one question remains: do we want to implement the high-precision functions as _inline_ functions or as external functions to call? Indeed, inlining all functions may lead to severe code bloats while calling functions will require to implement a proper ABI. We certainly want to do both actually. LLVM front-end -------------- The code is defined in `src/llvm`. We used the PTX ABI and the OpenCL profile to compile the code. Therefore, a good part of the job is already done. However, many things must be implemented: - Lowering down of various intrinsics like `llvm.memcpy` - Better resolving of the PHI functions. Today, we always generate MOV instructions at the end of each basic block . They can be easily optimized. - From LLVM 3.3, we use SPIR IR. We need to use the compiler defined type to represent sampler_t/image2d_t/image1d_t/.... Gen IR ------ The code is defined in `src/ir`. Main things to do are: - Finishing the handling of function arguments (see the [[IR description|gen_ir]] for more details) - Adding support for linking IR units together. OpenCL indeed allows to create programs from several sources - Uniform analysys. This is a major performance improvement. A "uniform" value is basically a value where regardless the control flow, all the activated lanes will be identical. Trivial examples are immediate values, function arguments. Also, operations on uniform will produce uniform values and so on... - Merging of independent uniform loads (and samples). This is a major performance improvement once the uniform analysis is done. Basically, several uniform loads may be collapsed into one load if no writes happens in-between. This will obviously impact both instruction selection and the register allocation. Backend ------- The code is defined in `src/backend`. Main things to do are: - Optimize register spilling (see the [[compiler backend description|compiler_backend]] for more details) - Implementing proper instruction selection. A "simple" tree matching algorithm should provide good results for Gen - Improving the instruction scheduling pass General plumbing ---------------- I tried to keep the code clean, well, as far as C++ can be really clean. There are some header cleaning steps required though, in particular in the backend code. The context used in the IR code generation (see `src/ir/context.*pp`) should be split up and cleaned up too. I also purely and simply copied and pasted the Gen ISA disassembler from Mesa. This leads to code duplication. Also some messages used by OpenCL (untyped reads and writes) are not properly decoded yet. All of those code should be improved and cleaned up are tracked with "XXX" comments in the code. Parts of the code leaks memory when exceptions are used. There are some pointers to track and replace with std::unique_ptr. Note that we also add a custom memory debugger that nicely complements (i.e. it is fast) Valgrind. Release_v0.3/docs/Beignet/Backend/compiler_backend.mdwn000066400000000000000000000114121223142177000232060ustar00rootroot00000000000000Compiler Back End ================= Well, the complete code base is somehow a compiler backend for LLVM. Here, we really speak about the final code generation passes that you may find in `src/backend`. As explained in [[the scalar IR presentation|gen_ir]], we bet on a very simple scalar IR to make it easy to parse and modify. The idea is to fix the unrelated problem (very Gen specific) where we can i.e. when the code is generated. The code generation in the compiler backend is classically divided into four steps - Instruction selection (defined in `src/backend/gen_insn_selection.*pp`). We expose an interface for the instruction selection engine. We implemented a very simple selection (called `SimpleSelection`) that does a quick and dirty one-to-many instruction generation. - Register allocation (defined in `src/backend/gen_reg_allocation.*pp`). The code implements a linear scan allocator on the code selected in the previous pass. See below for more details about register vector allocations. - Instruction scheduling. This one is not done yet. We just output the same instruction order as the program order. Note that we plan to implement an adaptive scheduling between register allocation and instruction selection (to avoid spilling as much as possible) - Instruction encoding. This is the final step that encodes the program into Gen ISA. Instruction selection --------------------- Usually, the instruction selection consists in mapping `p` instructions to `q` ISA instructions under a cost driven model. Each basic block is therefore _tiled_ into some numbers of groups of ISA instructions such that the final cost is minimized. The literature is particularly dense on the subject. Compilers usually use today either tree matching methods or selection DAG techniques (as LLVM backends do) The instruction selection is still a work in progress in our compiler and we only implement the most stupid (and inefficient) technique: we simply generate as many instructions as we need for each _individual_ IR instructions. Since we do not support immediate sources, this in particular leads to really ugly looking code such as `mov (16) r2:f 1.f`. It is still a work in progress. Other than that, the instruction selection is really a book keeping structure. We basically output `SelectionInstruction` objects which are the 1-to-1 mapping of Gen ISA encoding functions defined in `src/backend/gen_encoder.*pp`. However, the `SelectionInstruction` still use unallocated virtual registers and do *not* use vectors but simply tuples of virtual registers. Register allocation ------------------- The register allocation actually consists in two steps: 1. Handling the vector for all the instructions that require them 2. Performing the register allocation itself Step 1 consists in scanning all the vectors required by sends. Obviously, the same register may be used in different vectors and that may lead to interferences. We simply sort the vectors from the largest to the smallest and allocate them in that order. As an optimization we also identify sub-vectors i.e. vectors included in larger ones and no not allocate them. The code may be largely improved in particular if we take into account liveness interferences as well. Basically, a register may be part of several vectors if the registers that are not in both vectors at the same location are not alive at the same time. This is still a work in progress. Code is right now handled by method `GenRegAllocator::allocateVector`. Step 2 performs the register allocation i.e. it associates each virtual register to one (or several) physical registers. The first thing is that the Gen register file is very flexible i.e. it can (almost) be freely partitioned. To handle this peculiarity, we simply implemented a free list based generic memory allocator as done with `RegisterFilePartitioner` in `src/backend/context.cpp`. We then simply implemented a linear scan allocator (see `gen_reg_allocation.cpp`). The spilling is not implemented and is still a work in progress. The thing is that spilling must be specifically handled with Gen. Indeed: 1. Bad point. Spilling is expensive and require to assemble messages for it 2. Good point. Gen is able to spill up to 256 _contiguous_ bytes in one message. This must be used for high performance spilling and this may require to reorder properly registers to spill. Instruction scheduling ---------------------- Intra-basic block instruction scheduling is relatively simple. It is not implemented yet. Instruction encoding -------------------- This is mostly done in `src/backend/gen_context.cpp` and `src/backend/gen_encoder./*pp`. This is mostly glue code and it is pretty straightforward. We just forward the selection code using the physically allocated registers. There is nothing special here. Just boilerplate. Release_v0.3/docs/Beignet/Backend/flat_address_space.mdwn000066400000000000000000000071211223142177000235350ustar00rootroot00000000000000Flat Address Space ================== Segmented address space... -------------------------- The first challenge with OpenCL is its very liberal use of pointers. The memory is segment into several address spaces: - private. This is the memory for each work item - global. These are buffers in memory shared by all work items and work groups - constant. These are constant buffers in memory shared by all work items and work groups as well - local. These is a memory shared by all work items in the *same* work group ... But with no restriction inside each address space ----------------------------------------------------- The challenge is that there is no restriction in OpenCL inside each address space i.e. the full C semantic applies in particular regarding pointer arithmetic. Therefore the following code is valid: \_\_kernel void example(\_\_global int *dst, \_\_global int *src0, \_\_global int *src1)
{
  \_\_global int *from;
  if (get\_global\_id(0) % 2)
    from = src0;
  else
    from = src1;
  dst[get\_global\_id(0)] = from[get\_global\_id(0)];
}
As one may see, the load done in the last line actually mixes pointers from both source src0 and src1. This typically makes the use of binding table indices pretty hard. In we use binding table 0 for dst, 1 for src0 and 2 for src1 (for example), we are not able to express the load in the last line with one send only. No support for stateless in required messages --------------------------------------------- Furthermore, in IVB, we are going four types of messages to implement the loads and the stores - Byte scattered reads. They are used to read bytes/shorts/integers that are not aligned on 4 bytes. This is a gather message i.e. the user provides up to 16 addresses - Byte scattered writes. They are used to write bytes/shorts/integers that are not aligned on 4 bytes. This is a scatter message i.e. the user provides up to 16 addresses - Untyped reads. They allow to read from 1 to 4 double words (i.e 4 bytes) per lane. This is also a gather message i.e. up to 16 address are provided per message. - Untyped writes. They are the counter part of the untyped reads Problem is that IVB does not support stateless accesses for these messages. So surfaces are required. Secondly, stateless messages are not that interesting since all of them require a header which is still slow to assemble. Implemented solution -------------------- The solution is actually quite simple. Even with no stateless support, it is actually possible to simulate it with a surface. As one may see in the run-time code in `intel/intel_gpgpu.c`, we simply create a surface: - 2GB big - Which starts at offset 0 Surprisingly, this surface can actually map the complete GTT address space which is 2GB big. One may look at `flat_address_space` unit test in the run-time code that creates and copies buffers in such a way that the complete GTT address space is traversed. This solution brings a pretty simple implementation in the compiler side. Basically, there is nothing to do when translating from LLVM to Gen ISA. A pointer to `__global` or `__constant` memory is simply a 32 bits offset in that surface. Related problems ---------------- There is one drawback for this approach. Since we use a 2GB surface that maps the complete GTT space, there is no protection at all. Each write can therefore potentially modify any buffer including the command buffer, the frame buffer or the kernel code. There is *no* protection at all in the hardware to prevent that. Release_v0.3/docs/Beignet/Backend/gen_ir.mdwn000066400000000000000000000245371223142177000212040ustar00rootroot00000000000000Scalar Intermediate Representation ================================== The IR code is included in `src/ir/` of the compiler code base The IR as designed in this compiler is the fruit of a long reflection I mostly have with Thomas Raoux. Note I usually call it "Gen IR". Scalar vs vector IR ------------------- This is actually the major question: do we need a vector IR or a scalar IR? On the LLVM side, we have both. LLVM IR can manipulate vectors and scalars (and even generalized values but we can ignore it for now). For that reason, the Clang front-end generates both scalar and vector code. Typically, a `uint4` variable will output a vector of 4 integers. Arithmetic computations will be directly done on vector variables. One the HW side, the situation is completely different: - We are going to use the parallel mode (align1) i.e. the struct-of-array mode for the EU. This is a SIMD scalar mode. - The only source of vectors we are going to have is on the sends instructions (and marginally for some other instructions like the div_rem math instruction) One may therefore argue that we need vector instructions to handle the sends. Send will indeed require both vector destinations and sources. This may be a strong argument *for* vectors in the IR. However, the situation is not that good. Indeed, if we look carefully at the send instructions we see that they will require vectors that are *not* vectors in LLVM IR. This code for example: __global uint4 *src;
uint4 x = src[get\_global\_id(0)];
will be translated into an untyped write in the Gen ISA. Unfortunately, the address and the values to write are in the *same* vector. However, LLVM IR will output a store like: `store(%addr, %value)` which basically uses one scalar (the address) and one value (the vector to write). Therefore even if we handle vectors in the IR, that will not directly solve the problem we have at the end for the send instructions. We therefore decided to go the other direction: - We have a purely scalar IR - To replace vectors, we simply use multiple sources and destinations - Real vectors required by send instructions are handled at the very bottom of the stack in the register allocation passes. This leads to a very simple intermediate representation which is mostly a pure scalar RISC machine. Very limited IR --------------- The other major question, in particular when you look similar stacks like NVidia PTX, is: do we need to encode in the IR register modifiers (abs, negate...) and immediate registers (like in add.f x y 1.0)? Contrary to other IRs (PTX and even LLVM that both supports immediates), we also chose to have a very simply IR, much simpler than the final ISA, and to merge back what we need at the instruction selection pass. Since we need instruction selection, let us keep the IR simple. Also, there are a lot of major issues that can not be covered in the IR and require to be specifically handled at the very end of the code: - send vectors (see previous section) - send headers (value and register allocation) which are also part of the vector problem - SIMD8 mode in SIMD16 code. Some send messages do not support SIMD16 encoding and require SIMD8. Typically examples are typed writes i.e. scatters to textures. Also, this cannot be encoded in some way in a regular scalar IR. For these reasons, most of the problems directly related to Gen naturally find their solutions in either the instruction selection or the register allocator. This leads to the following strategy: - Keep the IR very simple and limited - Use all the analysis tools you need in the IR before the final code generation to build any information you need. This is pure "book-keeping". - Use any previous analysis and finish the job at the very end This classical approach leads to limit the complexity in the IR while forcing us to write the proper tools in the final stages. Why not using LLVM IR directly? ------------------------------- We hesitated a long time between writing a dedicated IR (as we did) and just using LLVM IR. Indeed, LLVM comes with a large set of tools that are parts of "LLVM backends". LLVM provides a lot of tools to perform the instruction selection (`SelectionDAG`) and the register allocation. Two things however prevent us from choosing this path: - We only have a limited experience with LLVM and no experience at all with the LLVM backends - LLVM register allocators do not handle at all the peculiarities of Gen: * flexible register file. Gen registers are more like memory than registers and can be freely allocated and aliased. LLVM register allocators only support partial aliasing like x86 machines do (rax -> eax -> ax) * no proper tools to handle vectors in the register allocator as we need for sends Since we will need to do some significant work anyway, this leads us to choose a more hard-coded path with a in-house IR. Note that will not prevent us from implementing later a LLVM backend "by the book" as Nvidia does today with PTX (using a LLVM backend to do the LLVM IR -> PTX conversion) SSA or no SSA ------------- Since we have a purely scalar IR, implementing a SSA transformation on the IR may be convenient. However, most the literature about compiler back-ends use non-SSA representation of the code. Since the primary goal is to write a compiler _back-end_ (instruction selection, register allocation and instruction scheduling), we keep the code in non-SSA letting the higher level optimizations to LLVM. Types, registers, instructions, functions and units --------------------------------------------------- The IR is organized as follows: - Types (defined in `src/ir/type.*pp`). These are scalar types only. Since the code is completely lowered down, there is no more reference to structures, pointers or vectors. Everything is scalar values and when "vectors" or "structures" would be needed, we use instead multiple scalar sources or destinations. - Registers (defined in `src/ir/register.*pp`). They are untyped (since Gen IR are untyped) and we have 65,535 of them per function - Instructions (defined in `src/ir/instruction.*pp`). They are typed (to distinguish integer and FP adds for example) and possibly support multiple destinations and sources. We also provide a convenient framework to introspect the instruction in a simple (and memory efficient) way - Functions (defined in `src/ir/function.*pp`). They are basically the counter part of LLVM functions or OpenCL kernels. Note that function arguments are a problem. We actually use the PTX ABI. Everything smaller than the machine word size (i.e. 32 bits for Gen) is passed by value with a register. Everything else which is bigger than is passed by pointer with a ByVal attribute. Note that requires some special treatment in the IR (see below) to make the code faster by replacing function argument loads by "pushed constants". We also defined one "register file" per function i.e. the registers are defined relatively to the function that uses them. Each function is made of basic blocks i.e. sequence of instructions that are executed linearly. - Units (defined in `src/ir/unit.*pp`). Units are just a collection of functions and constants (not supported yet). Function arguments and pushed constants --------------------------------------- Gen can push values into the register file i.e. some registers are preset when the kernel starts to run. As detailed previously, the PTX ABI is convenient since every argument is either one register or one pointer to load from or to store to. However, when a pointer is used for an argument, loads are issued which may be avoided by using constant pushes. Once again OCL makes the task a bit harder than expected. Indeed, the C semantic once again applies to function arguments as well. Look at these three examples: ### Case 1. Direct loads -> constant push can be used struct foo { int x; int y; };
\_\_kernel void case1(\_\_global int *dst, struct foo bar)
{
  dst[get\_global\_id(0)] = bar.x + bar.y;
}
We use a _direct_ _load_ for `bar` with `bar.x` and `bar.y`. Values can be pushed into registers and we can replace the loads by register reads. ### Case 2. Indirect loads -> we need to load the values from memory struct foo { int x[16]; };
\_\_kernel void case1(\_\_global int *dst, struct foo bar)
{
  dst[get\_global\_id(0)] = bar.x[get\_local\_id(0)];
}
We use an indirect load with `bar.x[get\_local\_id(0)]`. Here we need to issue a load from memory (well, actually, we could do a gather from registers, but it is not supported yet). ### Case 3. Writes to arguments -> we need to spill the values to memory first struct foo { int x[16]; };
\_\_kernel void case1(\_\_global int *dst, struct foo bar)
{
bar.x[0] = get\_global\_id(1);
  dst[get\_global\_id(0)] = bar.x[get\_local\_id(0)];
}
Here the values are written before being read. This causes some troubles since we are running in SIMD mode. Indeed, we only have in memory *one* instance of the function arguments. Here, *many* SIMD lanes and actually *many* hardware threads are running at the same time. This means that we can not write the data to memory. We need to allocate a private area for each SIMD lane. In that case, we need to spill back the function arguments into memory. We spill once per SIMD lane. Then, we read from this private area rather than the function arguments directly. This analysis is partially done today in `src/ir/lowering.*pp`. We identify all the cases but only the case with constant pushing is fully implemented. Actually, the two last cases are easy to implement but this requires one or two days of work. Value and liveness analysis tools --------------------------------- You may also notice that we provide a complete framework for value analysis (i.e. to figure when a value or instruction destination is used and where the instruction sources come from). The code is in `src/ir/value.*pp`. Well, today, this code will burn a crazy amount of memory (use of std::set all over the place) but it at least provides the analysis required by many other passes. Compacting the data structures and using O(n) algorithms instead of the O(ln(n)) are in the TODO list for sure :-) Finally, we also provide a liveness analysis tool which simply figures out which registers are alive at the end of each block (classically "live out" sets). Release_v0.3/docs/Beignet/Backend/unstructured_branches.mdwn000066400000000000000000000242341223142177000243470ustar00rootroot00000000000000Unstructured Branches ===================== A major challenge in making a OpenCL compiler is certainly to handle any kind of branches. Indeed LLVM does not make any distinction between structured branches. See [here](http://llvm.org/docs/LangRef.html) for a complete description of the LLVM assembly specification. The C branching code is simply lowered down in the following instructions: - `ret` to return from the current function - `br` that, if predicated, possibly jumps to two destinations (one for the taken branch and one for the other). - `switch` that implements the C switch/case construct. - `indirectbr` that implements a jump table - `invoke` and `resume` mostly used to handle exceptions Exceptions and jump tables are not supported in OpenCL. Switch cases can be lowered down to a sequence of if/else statements (using a divide and conquer approach a switch/case can be dispatched in log(n) complexity where n is the number of targets). This leads us to properly implement `br` and `ret` instructions. Solution 1 - Using Gen structured branches ------------------------------------------ Gen structured branches are the following instructions: `if` `else` `endif` `break` `continue` `while` `brd` `brc` Transforming the LLVM IR code into structured code results in basically reverse-engineering the LLVM code into the original C code. Unfortunately, there are several key problems: - OpenCL supports `goto` keyword that may jump to an arbitrary location - LLVM can transform the control flow graph in any kind of form - Worse is that a reducible control flow graph can be turned into an irreducible one by the optimizer. This can lead to complicated code transform and basic block duplication. The specification allows the compiler to abort if an irreducible control flow is detected but as an implementor, this is quite awkward to abort the compilation because the optimizer turns an reducible CFG to an irreducible one. Using structured branches is the open door to many corner cases. Thing is it exists a pretty elegant solution that can be almost seamlessly supported by Gen. This is the solution we retained. Solution 2 - Linearizing the control flow graph ----------------------------------------------- The general problem is to map a general control flow graph to a SIMD machine. The problem is fairly well understood today. A recent research paper actually dedicated to OpenCL like languages which use the "SPMD" (single program multiple data) programming model present interesting insights about how to map SIMD architectures to such languages (see [here] (http://www.cdl.uni-saarland.de/papers/karrenberg_opencl.pdf)). ### Core idea - Linearizing the CFG initially consists in removing all forward branches and "replace" them by predication. Indeed, the program will be still correct if you predicate instructions based instead of forward jumps. This is basically the a control flow to data flow conversion. - Of course, removing all forward branches is inefficient. To improve that, we simply introduce "if conditions" in the head of basic blocks to know if we run the basic block. If no lanes is going to be activated in the basic block, we jump to another basic block where _potentially_ some lanes are going to be reactivated. Consider the following CFG:
o-------o
|       |
|   1   |---->-----o
|       |          |
o-------o          |
    |              |
    |              |
o-------o          |
|       |          |
|   2   |---->-----------o
|       |          |     |
o-------o          |     |
    |              |     |
    |              |     |
    | o------o     |     |
    | |      |     |     |
    | v      |     |     |
o-------o    |     |     |
|       |    |     |     |
|   3   |    |     |     |
|       |    |     |     |
o-------o    |     |     |
    | |      |     |     |
    | o------o     |     |
    |              |     |
o-------o          |     |
|       |          |     |
|   4   |<---------o     |
|       |                |
o-------o                |
    |                    |
    |                    |
o-------o                |
|       |                |
|   5   |<----------------o
|       |
o-------o
Mapping it to a SIMD machine may seem challenging. Actually it is not too complicated. The problem is with the 2->5 jump. Indeed, we have to be sure that we are not missing any computation done in block 4. To do so: - Instead of jumping from block 2 to block 5, we jump from block 2 to block 4. - We implement a `JOIN` point on top of block 4. We check if any lane is going to be reactivated for the block 4. If not, we jump to block 5. This leads to the following linearized CFG:
o-------o
|       |
|   1   |---->-----o
|       |          |
o-------o          |
    |              |
    |              |
o-------o          |
|       |          |
|   2   |---->-----------o
|       |          |     |
o-------o          |     |
    |              |     |
    |              |     |
    | o--<---o     |     |
    | |      |     |     |
    | v      |     |     |
o-------o    |     |     |
|       |    |     |     |
|   3   |    ^     |     |
|       |    |     |     |
o-------o    |     |     |
    | |      |     |     |
    | o-->---o     |     |
    |              |     |
o-------o          |     |
|       |==========|=====|====O
|   4   |<---------|-----o    |
|       |<---------o          |
o-------o                     |
    |                         |
    |                         |
o-------o                     |
|       |                     |
|   5   |<====================O
|       |
o-------o
There is a new jump from block 4 to block 5. ### Implementation on Gen When using structured branches, Gen can supports auto-masking i.e. based on the branches which are taken, the control flow is properly handled and masks are automatically applied on all instructions. However, there is no similar support for unstructured branches. We therefore decided to mask instructions manually and use single program flow. This is actually quite easy to do since Gen is able to predicate any branches. Now, how to evaluate the if conditions in an efficient way? The choice we did is to use *per-lane block IPs*: for each SIMD lane, we store a short (16 bits) for each lane in a regular 256 bits GPR (general purpose register). This "blockIP" register is used in the following way: At the beginning of each block, we compare the blockIP register with the ID of the block. The lane is going to be _activated_ if its blockIP is _smaller_ than the ID of the block. Otherwise, the lane is deactivated. Therefore, we build a flag register at the entry of each basic block with a single 16-wide uint16_t compare. If no lane is activated, a jump is performed to the next block where some lanes is going to be activated. Since this is regular jumps, we just use `jmpi` instruction. With the help of predication, we can express all the different possibilities: - backward branches are always taken if _any_ of lanes in the predicate is true. We just use `<+f0.0.anyh>` predication. - forward branches is *not* taken if some of the lanes are going to activated in the next block. We therefore compare the blockIP with the ID of the _next_ block. If all of them are strictly greater than the ID of the next block, we jump. We therefore use the `<+f0.0.allh>` predicate in that case. - `JOIN` points are even simpler. We simply jump if none of the lane is activated. We therefore use the `<-f0.0.anyh>` predicate. The complete encoding is done in `src/backend/gen_insn_selection.cpp`. Forward branches are handled by `SimpleSelection::emitForwardBranch`. Backward branches are handled by `SimpleSelection::emitBackwardBranch`. Finally, since `JOIN` points are at the top of each basic blocks, they are handled by `SimpleSelection::emitLabelInstruction`. ### Computing `JOIN` points The last problem is to compute `JOIN` point i.e. we need to know if we need to jump at the beginning of each block and if we do, what is the target of the branch. The code is relatively straightforward and can be found in `src/backend/context.cpp`. Function is `Context::buildJIPs`.
Actually, the current implementation is not that elegant. A colleague, Thomas Raoux, has a simpler and better idea to handle it. ### Advantages and drawbacks of the method - The method has one decisive advantage: it is simple and extremely robust. It can handle any kind of CFGs (reducible or not) and does not require any transformation. The use of shorts is also not random. 16-wide compares is issued in 2 cycles (so it is twice fast as 16-wide 32 bits compares). - Main drawback will be performance. Even if this is not so bad, we still need more instructions than if we used structured branches. Mostly * one or two instructions for `JOIN` points * three instructions for backward and forward jumps (two more than structured branches that just require the branch instruction itself) Note that all extra instructions are 16 bits instructions (i.e. they use shorts) so they will only cost 2 cycles anyway. The last point is that Gen encoding restricts conditional modifiers and predicates to be the same in the instruction. This requires to copy or recompute the flag register for compares and select. So one more instruction is required for these two instructions. Once again, this would require only 2 cycles. Remarks on `ret` instructions ----------------------------- Since we can handle any kind of CFG, handling the return statements are relatively straightforward. We first create one return block at the end of the program. Then we replace all other returns by a unconditional jump to this block. The CFG linearization will take care of the rest. We then simply encode the (only one) return instruction as a End-Of-Thread message (EOT). Code examples ------------- Some tests were written to assert the correctness of the CFG linearization and the code generation. They can be found in the _run-time_ code base here: `utest/compiler_if_else.cpp` `utest/compiler_lower_return0.cpp` `utest/compiler_lower_return1.cpp` `utest/compiler_lower_return2.cpp` `utest/compiler_short_scatter.cpp` `utest/compiler_unstructured_branch0.cpp` `utest/compiler_unstructured_branch1.cpp` `utest/compiler_unstructured_branch2.cpp` `utest/compiler_unstructured_branch3.cpp` Release_v0.3/include/000077500000000000000000000000001223142177000146265ustar00rootroot00000000000000Release_v0.3/include/CL/000077500000000000000000000000001223142177000151245ustar00rootroot00000000000000Release_v0.3/include/CL/cl.h000066400000000000000000001416671223142177000157120ustar00rootroot00000000000000/******************************************************************************* * Copyright (c) 2008-2010 The Khronos Group Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and/or associated documentation files (the * "Materials"), to deal in the Materials without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Materials, and to * permit persons to whom the Materials are furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Materials. * * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. ******************************************************************************/ /* $Revision: 11985 $ on $Date: 2010-07-15 11:16:06 -0700 (Thu, 15 Jul 2010) $ */ #ifndef __OPENCL_CL_H #define __OPENCL_CL_H #ifdef __APPLE__ #include #else #include #endif #ifdef __cplusplus extern "C" { #endif /******************************************************************************/ typedef struct _cl_platform_id * cl_platform_id; typedef struct _cl_device_id * cl_device_id; typedef struct _cl_context * cl_context; typedef struct _cl_command_queue * cl_command_queue; typedef struct _cl_mem * cl_mem; typedef struct _cl_program * cl_program; typedef struct _cl_kernel * cl_kernel; typedef struct _cl_event * cl_event; typedef struct _cl_sampler * cl_sampler; typedef cl_uint cl_bool; /* WARNING! Unlike cl_ types in cl_platform.h, cl_bool is not guaranteed to be the same size as the bool in kernels. */ typedef cl_ulong cl_bitfield; typedef cl_bitfield cl_device_type; typedef cl_uint cl_platform_info; typedef cl_uint cl_device_info; typedef cl_bitfield cl_device_fp_config; typedef cl_uint cl_device_mem_cache_type; typedef cl_uint cl_device_local_mem_type; typedef cl_bitfield cl_device_exec_capabilities; typedef cl_bitfield cl_command_queue_properties; typedef intptr_t cl_context_properties; typedef cl_uint cl_context_info; typedef cl_uint cl_command_queue_info; typedef cl_uint cl_channel_order; typedef cl_uint cl_channel_type; typedef cl_bitfield cl_mem_flags; typedef cl_uint cl_mem_object_type; typedef cl_uint cl_mem_info; typedef cl_uint cl_image_info; typedef cl_uint cl_buffer_create_type; typedef cl_uint cl_addressing_mode; typedef cl_uint cl_filter_mode; typedef cl_uint cl_sampler_info; typedef cl_bitfield cl_map_flags; typedef cl_uint cl_program_info; typedef cl_uint cl_program_build_info; typedef cl_int cl_build_status; typedef cl_uint cl_kernel_info; typedef cl_uint cl_kernel_work_group_info; typedef cl_uint cl_event_info; typedef cl_uint cl_command_type; typedef cl_uint cl_profiling_info; typedef struct _cl_image_format { cl_channel_order image_channel_order; cl_channel_type image_channel_data_type; } cl_image_format; typedef struct _cl_buffer_region { size_t origin; size_t size; } cl_buffer_region; /******************************************************************************/ /* Error Codes */ #define CL_SUCCESS 0 #define CL_DEVICE_NOT_FOUND -1 #define CL_DEVICE_NOT_AVAILABLE -2 #define CL_COMPILER_NOT_AVAILABLE -3 #define CL_MEM_OBJECT_ALLOCATION_FAILURE -4 #define CL_OUT_OF_RESOURCES -5 #define CL_OUT_OF_HOST_MEMORY -6 #define CL_PROFILING_INFO_NOT_AVAILABLE -7 #define CL_MEM_COPY_OVERLAP -8 #define CL_IMAGE_FORMAT_MISMATCH -9 #define CL_IMAGE_FORMAT_NOT_SUPPORTED -10 #define CL_BUILD_PROGRAM_FAILURE -11 #define CL_MAP_FAILURE -12 #define CL_MISALIGNED_SUB_BUFFER_OFFSET -13 #define CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST -14 #define CL_INVALID_VALUE -30 #define CL_INVALID_DEVICE_TYPE -31 #define CL_INVALID_PLATFORM -32 #define CL_INVALID_DEVICE -33 #define CL_INVALID_CONTEXT -34 #define CL_INVALID_QUEUE_PROPERTIES -35 #define CL_INVALID_COMMAND_QUEUE -36 #define CL_INVALID_HOST_PTR -37 #define CL_INVALID_MEM_OBJECT -38 #define CL_INVALID_IMAGE_FORMAT_DESCRIPTOR -39 #define CL_INVALID_IMAGE_SIZE -40 #define CL_INVALID_SAMPLER -41 #define CL_INVALID_BINARY -42 #define CL_INVALID_BUILD_OPTIONS -43 #define CL_INVALID_PROGRAM -44 #define CL_INVALID_PROGRAM_EXECUTABLE -45 #define CL_INVALID_KERNEL_NAME -46 #define CL_INVALID_KERNEL_DEFINITION -47 #define CL_INVALID_KERNEL -48 #define CL_INVALID_ARG_INDEX -49 #define CL_INVALID_ARG_VALUE -50 #define CL_INVALID_ARG_SIZE -51 #define CL_INVALID_KERNEL_ARGS -52 #define CL_INVALID_WORK_DIMENSION -53 #define CL_INVALID_WORK_GROUP_SIZE -54 #define CL_INVALID_WORK_ITEM_SIZE -55 #define CL_INVALID_GLOBAL_OFFSET -56 #define CL_INVALID_EVENT_WAIT_LIST -57 #define CL_INVALID_EVENT -58 #define CL_INVALID_OPERATION -59 #define CL_INVALID_GL_OBJECT -60 #define CL_INVALID_BUFFER_SIZE -61 #define CL_INVALID_MIP_LEVEL -62 #define CL_INVALID_GLOBAL_WORK_SIZE -63 #define CL_INVALID_PROPERTY -64 /* OpenCL Version */ #define CL_VERSION_1_0 1 #define CL_VERSION_1_1 1 /* cl_bool */ #define CL_FALSE 0 #define CL_TRUE 1 /* cl_platform_info */ #define CL_PLATFORM_PROFILE 0x0900 #define CL_PLATFORM_VERSION 0x0901 #define CL_PLATFORM_NAME 0x0902 #define CL_PLATFORM_VENDOR 0x0903 #define CL_PLATFORM_EXTENSIONS 0x0904 /* cl_device_type - bitfield */ #define CL_DEVICE_TYPE_DEFAULT (1 << 0) #define CL_DEVICE_TYPE_CPU (1 << 1) #define CL_DEVICE_TYPE_GPU (1 << 2) #define CL_DEVICE_TYPE_ACCELERATOR (1 << 3) #define CL_DEVICE_TYPE_ALL 0xFFFFFFFF /* cl_device_info */ #define CL_DEVICE_TYPE 0x1000 #define CL_DEVICE_VENDOR_ID 0x1001 #define CL_DEVICE_MAX_COMPUTE_UNITS 0x1002 #define CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS 0x1003 #define CL_DEVICE_MAX_WORK_GROUP_SIZE 0x1004 #define CL_DEVICE_MAX_WORK_ITEM_SIZES 0x1005 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR 0x1006 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT 0x1007 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT 0x1008 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG 0x1009 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT 0x100A #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE 0x100B #define CL_DEVICE_MAX_CLOCK_FREQUENCY 0x100C #define CL_DEVICE_ADDRESS_BITS 0x100D #define CL_DEVICE_MAX_READ_IMAGE_ARGS 0x100E #define CL_DEVICE_MAX_WRITE_IMAGE_ARGS 0x100F #define CL_DEVICE_MAX_MEM_ALLOC_SIZE 0x1010 #define CL_DEVICE_IMAGE2D_MAX_WIDTH 0x1011 #define CL_DEVICE_IMAGE2D_MAX_HEIGHT 0x1012 #define CL_DEVICE_IMAGE3D_MAX_WIDTH 0x1013 #define CL_DEVICE_IMAGE3D_MAX_HEIGHT 0x1014 #define CL_DEVICE_IMAGE3D_MAX_DEPTH 0x1015 #define CL_DEVICE_IMAGE_SUPPORT 0x1016 #define CL_DEVICE_MAX_PARAMETER_SIZE 0x1017 #define CL_DEVICE_MAX_SAMPLERS 0x1018 #define CL_DEVICE_MEM_BASE_ADDR_ALIGN 0x1019 #define CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE 0x101A #define CL_DEVICE_SINGLE_FP_CONFIG 0x101B #define CL_DEVICE_GLOBAL_MEM_CACHE_TYPE 0x101C #define CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE 0x101D #define CL_DEVICE_GLOBAL_MEM_CACHE_SIZE 0x101E #define CL_DEVICE_GLOBAL_MEM_SIZE 0x101F #define CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE 0x1020 #define CL_DEVICE_MAX_CONSTANT_ARGS 0x1021 #define CL_DEVICE_LOCAL_MEM_TYPE 0x1022 #define CL_DEVICE_LOCAL_MEM_SIZE 0x1023 #define CL_DEVICE_ERROR_CORRECTION_SUPPORT 0x1024 #define CL_DEVICE_PROFILING_TIMER_RESOLUTION 0x1025 #define CL_DEVICE_ENDIAN_LITTLE 0x1026 #define CL_DEVICE_AVAILABLE 0x1027 #define CL_DEVICE_COMPILER_AVAILABLE 0x1028 #define CL_DEVICE_EXECUTION_CAPABILITIES 0x1029 #define CL_DEVICE_QUEUE_PROPERTIES 0x102A #define CL_DEVICE_NAME 0x102B #define CL_DEVICE_VENDOR 0x102C #define CL_DRIVER_VERSION 0x102D #define CL_DEVICE_PROFILE 0x102E #define CL_DEVICE_VERSION 0x102F #define CL_DEVICE_EXTENSIONS 0x1030 #define CL_DEVICE_PLATFORM 0x1031 /* 0x1032 reserved for CL_DEVICE_DOUBLE_FP_CONFIG */ /* 0x1033 reserved for CL_DEVICE_HALF_FP_CONFIG */ #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF 0x1034 #define CL_DEVICE_HOST_UNIFIED_MEMORY 0x1035 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR 0x1036 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT 0x1037 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_INT 0x1038 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG 0x1039 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT 0x103A #define CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE 0x103B #define CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF 0x103C #define CL_DEVICE_OPENCL_C_VERSION 0x103D /* cl_device_fp_config - bitfield */ #define CL_FP_DENORM (1 << 0) #define CL_FP_INF_NAN (1 << 1) #define CL_FP_ROUND_TO_NEAREST (1 << 2) #define CL_FP_ROUND_TO_ZERO (1 << 3) #define CL_FP_ROUND_TO_INF (1 << 4) #define CL_FP_FMA (1 << 5) #define CL_FP_SOFT_FLOAT (1 << 6) /* cl_device_mem_cache_type */ #define CL_NONE 0x0 #define CL_READ_ONLY_CACHE 0x1 #define CL_READ_WRITE_CACHE 0x2 /* cl_device_local_mem_type */ #define CL_LOCAL 0x1 #define CL_GLOBAL 0x2 /* cl_device_exec_capabilities - bitfield */ #define CL_EXEC_KERNEL (1 << 0) #define CL_EXEC_NATIVE_KERNEL (1 << 1) /* cl_command_queue_properties - bitfield */ #define CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE (1 << 0) #define CL_QUEUE_PROFILING_ENABLE (1 << 1) /* cl_context_info */ #define CL_CONTEXT_REFERENCE_COUNT 0x1080 #define CL_CONTEXT_DEVICES 0x1081 #define CL_CONTEXT_PROPERTIES 0x1082 #define CL_CONTEXT_NUM_DEVICES 0x1083 /* cl_context_info + cl_context_properties */ #define CL_CONTEXT_PLATFORM 0x1084 /* cl_command_queue_info */ #define CL_QUEUE_CONTEXT 0x1090 #define CL_QUEUE_DEVICE 0x1091 #define CL_QUEUE_REFERENCE_COUNT 0x1092 #define CL_QUEUE_PROPERTIES 0x1093 /* cl_mem_flags - bitfield */ #define CL_MEM_READ_WRITE (1 << 0) #define CL_MEM_WRITE_ONLY (1 << 1) #define CL_MEM_READ_ONLY (1 << 2) #define CL_MEM_USE_HOST_PTR (1 << 3) #define CL_MEM_ALLOC_HOST_PTR (1 << 4) #define CL_MEM_COPY_HOST_PTR (1 << 5) /* cl_channel_order */ #define CL_R 0x10B0 #define CL_A 0x10B1 #define CL_RG 0x10B2 #define CL_RA 0x10B3 #define CL_RGB 0x10B4 #define CL_RGBA 0x10B5 #define CL_BGRA 0x10B6 #define CL_ARGB 0x10B7 #define CL_INTENSITY 0x10B8 #define CL_LUMINANCE 0x10B9 #define CL_Rx 0x10BA #define CL_RGx 0x10BB #define CL_RGBx 0x10BC /* cl_channel_type */ #define CL_SNORM_INT8 0x10D0 #define CL_SNORM_INT16 0x10D1 #define CL_UNORM_INT8 0x10D2 #define CL_UNORM_INT16 0x10D3 #define CL_UNORM_SHORT_565 0x10D4 #define CL_UNORM_SHORT_555 0x10D5 #define CL_UNORM_INT_101010 0x10D6 #define CL_SIGNED_INT8 0x10D7 #define CL_SIGNED_INT16 0x10D8 #define CL_SIGNED_INT32 0x10D9 #define CL_UNSIGNED_INT8 0x10DA #define CL_UNSIGNED_INT16 0x10DB #define CL_UNSIGNED_INT32 0x10DC #define CL_HALF_FLOAT 0x10DD #define CL_FLOAT 0x10DE /* cl_mem_object_type */ #define CL_MEM_OBJECT_BUFFER 0x10F0 #define CL_MEM_OBJECT_IMAGE2D 0x10F1 #define CL_MEM_OBJECT_IMAGE3D 0x10F2 /* cl_mem_info */ #define CL_MEM_TYPE 0x1100 #define CL_MEM_FLAGS 0x1101 #define CL_MEM_SIZE 0x1102 #define CL_MEM_HOST_PTR 0x1103 #define CL_MEM_MAP_COUNT 0x1104 #define CL_MEM_REFERENCE_COUNT 0x1105 #define CL_MEM_CONTEXT 0x1106 #define CL_MEM_ASSOCIATED_MEMOBJECT 0x1107 #define CL_MEM_OFFSET 0x1108 /* cl_image_info */ #define CL_IMAGE_FORMAT 0x1110 #define CL_IMAGE_ELEMENT_SIZE 0x1111 #define CL_IMAGE_ROW_PITCH 0x1112 #define CL_IMAGE_SLICE_PITCH 0x1113 #define CL_IMAGE_WIDTH 0x1114 #define CL_IMAGE_HEIGHT 0x1115 #define CL_IMAGE_DEPTH 0x1116 /* cl_addressing_mode */ #define CL_ADDRESS_NONE 0x1130 #define CL_ADDRESS_CLAMP_TO_EDGE 0x1131 #define CL_ADDRESS_CLAMP 0x1132 #define CL_ADDRESS_REPEAT 0x1133 #define CL_ADDRESS_MIRRORED_REPEAT 0x1134 /* cl_filter_mode */ #define CL_FILTER_NEAREST 0x1140 #define CL_FILTER_LINEAR 0x1141 /* cl_sampler_info */ #define CL_SAMPLER_REFERENCE_COUNT 0x1150 #define CL_SAMPLER_CONTEXT 0x1151 #define CL_SAMPLER_NORMALIZED_COORDS 0x1152 #define CL_SAMPLER_ADDRESSING_MODE 0x1153 #define CL_SAMPLER_FILTER_MODE 0x1154 /* cl_map_flags - bitfield */ #define CL_MAP_READ (1 << 0) #define CL_MAP_WRITE (1 << 1) /* cl_program_info */ #define CL_PROGRAM_REFERENCE_COUNT 0x1160 #define CL_PROGRAM_CONTEXT 0x1161 #define CL_PROGRAM_NUM_DEVICES 0x1162 #define CL_PROGRAM_DEVICES 0x1163 #define CL_PROGRAM_SOURCE 0x1164 #define CL_PROGRAM_BINARY_SIZES 0x1165 #define CL_PROGRAM_BINARIES 0x1166 /* cl_program_build_info */ #define CL_PROGRAM_BUILD_STATUS 0x1181 #define CL_PROGRAM_BUILD_OPTIONS 0x1182 #define CL_PROGRAM_BUILD_LOG 0x1183 /* cl_build_status */ #define CL_BUILD_SUCCESS 0 #define CL_BUILD_NONE -1 #define CL_BUILD_ERROR -2 #define CL_BUILD_IN_PROGRESS -3 /* cl_kernel_info */ #define CL_KERNEL_FUNCTION_NAME 0x1190 #define CL_KERNEL_NUM_ARGS 0x1191 #define CL_KERNEL_REFERENCE_COUNT 0x1192 #define CL_KERNEL_CONTEXT 0x1193 #define CL_KERNEL_PROGRAM 0x1194 /* cl_kernel_work_group_info */ #define CL_KERNEL_WORK_GROUP_SIZE 0x11B0 #define CL_KERNEL_COMPILE_WORK_GROUP_SIZE 0x11B1 #define CL_KERNEL_LOCAL_MEM_SIZE 0x11B2 #define CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE 0x11B3 #define CL_KERNEL_PRIVATE_MEM_SIZE 0x11B4 /* cl_event_info */ #define CL_EVENT_COMMAND_QUEUE 0x11D0 #define CL_EVENT_COMMAND_TYPE 0x11D1 #define CL_EVENT_REFERENCE_COUNT 0x11D2 #define CL_EVENT_COMMAND_EXECUTION_STATUS 0x11D3 #define CL_EVENT_CONTEXT 0x11D4 /* cl_command_type */ #define CL_COMMAND_NDRANGE_KERNEL 0x11F0 #define CL_COMMAND_TASK 0x11F1 #define CL_COMMAND_NATIVE_KERNEL 0x11F2 #define CL_COMMAND_READ_BUFFER 0x11F3 #define CL_COMMAND_WRITE_BUFFER 0x11F4 #define CL_COMMAND_COPY_BUFFER 0x11F5 #define CL_COMMAND_READ_IMAGE 0x11F6 #define CL_COMMAND_WRITE_IMAGE 0x11F7 #define CL_COMMAND_COPY_IMAGE 0x11F8 #define CL_COMMAND_COPY_IMAGE_TO_BUFFER 0x11F9 #define CL_COMMAND_COPY_BUFFER_TO_IMAGE 0x11FA #define CL_COMMAND_MAP_BUFFER 0x11FB #define CL_COMMAND_MAP_IMAGE 0x11FC #define CL_COMMAND_UNMAP_MEM_OBJECT 0x11FD #define CL_COMMAND_MARKER 0x11FE #define CL_COMMAND_ACQUIRE_GL_OBJECTS 0x11FF #define CL_COMMAND_RELEASE_GL_OBJECTS 0x1200 #define CL_COMMAND_READ_BUFFER_RECT 0x1201 #define CL_COMMAND_WRITE_BUFFER_RECT 0x1202 #define CL_COMMAND_COPY_BUFFER_RECT 0x1203 #define CL_COMMAND_USER 0x1204 /* command execution status */ #define CL_COMPLETE 0x0 #define CL_RUNNING 0x1 #define CL_SUBMITTED 0x2 #define CL_QUEUED 0x3 /* cl_buffer_create_type */ #define CL_BUFFER_CREATE_TYPE_REGION 0x1220 /* cl_profiling_info */ #define CL_PROFILING_COMMAND_QUEUED 0x1280 #define CL_PROFILING_COMMAND_SUBMIT 0x1281 #define CL_PROFILING_COMMAND_START 0x1282 #define CL_PROFILING_COMMAND_END 0x1283 /********************************************************************************************************/ /* Platform API */ extern CL_API_ENTRY cl_int CL_API_CALL clGetPlatformIDs(cl_uint /* num_entries */, cl_platform_id * /* platforms */, cl_uint * /* num_platforms */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clGetPlatformInfo(cl_platform_id /* platform */, cl_platform_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; /* Device APIs */ extern CL_API_ENTRY cl_int CL_API_CALL clGetDeviceIDs(cl_platform_id /* platform */, cl_device_type /* device_type */, cl_uint /* num_entries */, cl_device_id * /* devices */, cl_uint * /* num_devices */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clGetDeviceInfo(cl_device_id /* device */, cl_device_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; /* Context APIs */ extern CL_API_ENTRY cl_context CL_API_CALL clCreateContext(const cl_context_properties * /* properties */, cl_uint /* num_devices */, const cl_device_id * /* devices */, void (CL_CALLBACK * /* pfn_notify */)(const char *, const void *, size_t, void *), void * /* user_data */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_context CL_API_CALL clCreateContextFromType(const cl_context_properties * /* properties */, cl_device_type /* device_type */, void (CL_CALLBACK * /* pfn_notify*/ )(const char *, const void *, size_t, void *), void * /* user_data */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clRetainContext(cl_context /* context */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clReleaseContext(cl_context /* context */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clGetContextInfo(cl_context /* context */, cl_context_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; /* Command Queue APIs */ extern CL_API_ENTRY cl_command_queue CL_API_CALL clCreateCommandQueue(cl_context /* context */, cl_device_id /* device */, cl_command_queue_properties /* properties */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clRetainCommandQueue(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clReleaseCommandQueue(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clGetCommandQueueInfo(cl_command_queue /* command_queue */, cl_command_queue_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; #ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS #warning CL_USE_DEPRECATED_OPENCL_1_0_APIS is defined. These APIs are unsupported and untested in OpenCL 1.1! /* * WARNING: * This API introduces mutable state into the OpenCL implementation. It has been REMOVED * to better facilitate thread safety. The 1.0 API is not thread safe. It is not tested by the * OpenCL 1.1 conformance test, and consequently may not work or may not work dependably. * It is likely to be non-performant. Use of this API is not advised. Use at your own risk. * * Software developers previously relying on this API are instructed to set the command queue * properties when creating the queue, instead. */ extern CL_API_ENTRY cl_int CL_API_CALL clSetCommandQueueProperty(cl_command_queue /* command_queue */, cl_command_queue_properties /* properties */, cl_bool /* enable */, cl_command_queue_properties * /* old_properties */) CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED; #endif /* CL_USE_DEPRECATED_OPENCL_1_0_APIS */ /* Memory Object APIs */ extern CL_API_ENTRY cl_mem CL_API_CALL clCreateBuffer(cl_context /* context */, cl_mem_flags /* flags */, size_t /* size */, void * /* host_ptr */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_mem CL_API_CALL clCreateSubBuffer(cl_mem /* buffer */, cl_mem_flags /* flags */, cl_buffer_create_type /* buffer_create_type */, const void * /* buffer_create_info */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1; extern CL_API_ENTRY cl_mem CL_API_CALL clCreateImage2D(cl_context /* context */, cl_mem_flags /* flags */, const cl_image_format * /* image_format */, size_t /* image_width */, size_t /* image_height */, size_t /* image_row_pitch */, void * /* host_ptr */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_mem CL_API_CALL clCreateImage3D(cl_context /* context */, cl_mem_flags /* flags */, const cl_image_format * /* image_format */, size_t /* image_width */, size_t /* image_height */, size_t /* image_depth */, size_t /* image_row_pitch */, size_t /* image_slice_pitch */, void * /* host_ptr */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clRetainMemObject(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clReleaseMemObject(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clGetSupportedImageFormats(cl_context /* context */, cl_mem_flags /* flags */, cl_mem_object_type /* image_type */, cl_uint /* num_entries */, cl_image_format * /* image_formats */, cl_uint * /* num_image_formats */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clGetMemObjectInfo(cl_mem /* memobj */, cl_mem_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clGetImageInfo(cl_mem /* image */, cl_image_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clSetMemObjectDestructorCallback( cl_mem /* memobj */, void (CL_CALLBACK * /*pfn_notify*/)( cl_mem /* memobj */, void* /*user_data*/), void * /*user_data */ ) CL_API_SUFFIX__VERSION_1_1; /* Sampler APIs */ extern CL_API_ENTRY cl_sampler CL_API_CALL clCreateSampler(cl_context /* context */, cl_bool /* normalized_coords */, cl_addressing_mode /* addressing_mode */, cl_filter_mode /* filter_mode */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clRetainSampler(cl_sampler /* sampler */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clReleaseSampler(cl_sampler /* sampler */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clGetSamplerInfo(cl_sampler /* sampler */, cl_sampler_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; /* Program Object APIs */ extern CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithSource(cl_context /* context */, cl_uint /* count */, const char ** /* strings */, const size_t * /* lengths */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithBinary(cl_context /* context */, cl_uint /* num_devices */, const cl_device_id * /* device_list */, const size_t * /* lengths */, const unsigned char ** /* binaries */, cl_int * /* binary_status */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clRetainProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clReleaseProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clBuildProgram(cl_program /* program */, cl_uint /* num_devices */, const cl_device_id * /* device_list */, const char * /* options */, void (CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */), void * /* user_data */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clUnloadCompiler(void) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clGetProgramInfo(cl_program /* program */, cl_program_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clGetProgramBuildInfo(cl_program /* program */, cl_device_id /* device */, cl_program_build_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; /* Kernel Object APIs */ extern CL_API_ENTRY cl_kernel CL_API_CALL clCreateKernel(cl_program /* program */, const char * /* kernel_name */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clCreateKernelsInProgram(cl_program /* program */, cl_uint /* num_kernels */, cl_kernel * /* kernels */, cl_uint * /* num_kernels_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clRetainKernel(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clReleaseKernel(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clSetKernelArg(cl_kernel /* kernel */, cl_uint /* arg_index */, size_t /* arg_size */, const void * /* arg_value */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clGetKernelInfo(cl_kernel /* kernel */, cl_kernel_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clGetKernelWorkGroupInfo(cl_kernel /* kernel */, cl_device_id /* device */, cl_kernel_work_group_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; /* Event Object APIs */ extern CL_API_ENTRY cl_int CL_API_CALL clWaitForEvents(cl_uint /* num_events */, const cl_event * /* event_list */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clGetEventInfo(cl_event /* event */, cl_event_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_event CL_API_CALL clCreateUserEvent(cl_context /* context */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1; extern CL_API_ENTRY cl_int CL_API_CALL clRetainEvent(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clReleaseEvent(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clSetUserEventStatus(cl_event /* event */, cl_int /* execution_status */) CL_API_SUFFIX__VERSION_1_1; extern CL_API_ENTRY cl_int CL_API_CALL clSetEventCallback( cl_event /* event */, cl_int /* command_exec_callback_type */, void (CL_CALLBACK * /* pfn_notify */)(cl_event, cl_int, void *), void * /* user_data */) CL_API_SUFFIX__VERSION_1_1; /* Profiling APIs */ extern CL_API_ENTRY cl_int CL_API_CALL clGetEventProfilingInfo(cl_event /* event */, cl_profiling_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; /* Flush and Finish APIs */ extern CL_API_ENTRY cl_int CL_API_CALL clFlush(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clFinish(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; /* Enqueued Commands APIs */ extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueReadBuffer(cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_read */, size_t /* offset */, size_t /* cb */, void * /* ptr */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueReadBufferRect(cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_read */, const size_t * /* buffer_origin */, const size_t * /* host_origin */, const size_t * /* region */, size_t /* buffer_row_pitch */, size_t /* buffer_slice_pitch */, size_t /* host_row_pitch */, size_t /* host_slice_pitch */, void * /* ptr */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_1; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueWriteBuffer(cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_write */, size_t /* offset */, size_t /* cb */, const void * /* ptr */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueWriteBufferRect(cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_write */, const size_t * /* buffer_origin */, const size_t * /* host_origin */, const size_t * /* region */, size_t /* buffer_row_pitch */, size_t /* buffer_slice_pitch */, size_t /* host_row_pitch */, size_t /* host_slice_pitch */, const void * /* ptr */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_1; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyBuffer(cl_command_queue /* command_queue */, cl_mem /* src_buffer */, cl_mem /* dst_buffer */, size_t /* src_offset */, size_t /* dst_offset */, size_t /* cb */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyBufferRect(cl_command_queue /* command_queue */, cl_mem /* src_buffer */, cl_mem /* dst_buffer */, const size_t * /* src_origin */, const size_t * /* dst_origin */, const size_t * /* region */, size_t /* src_row_pitch */, size_t /* src_slice_pitch */, size_t /* dst_row_pitch */, size_t /* dst_slice_pitch */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_1; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueReadImage(cl_command_queue /* command_queue */, cl_mem /* image */, cl_bool /* blocking_read */, const size_t * /* origin[3] */, const size_t * /* region[3] */, size_t /* row_pitch */, size_t /* slice_pitch */, void * /* ptr */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueWriteImage(cl_command_queue /* command_queue */, cl_mem /* image */, cl_bool /* blocking_write */, const size_t * /* origin[3] */, const size_t * /* region[3] */, size_t /* input_row_pitch */, size_t /* input_slice_pitch */, const void * /* ptr */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyImage(cl_command_queue /* command_queue */, cl_mem /* src_image */, cl_mem /* dst_image */, const size_t * /* src_origin[3] */, const size_t * /* dst_origin[3] */, const size_t * /* region[3] */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyImageToBuffer(cl_command_queue /* command_queue */, cl_mem /* src_image */, cl_mem /* dst_buffer */, const size_t * /* src_origin[3] */, const size_t * /* region[3] */, size_t /* dst_offset */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyBufferToImage(cl_command_queue /* command_queue */, cl_mem /* src_buffer */, cl_mem /* dst_image */, size_t /* src_offset */, const size_t * /* dst_origin[3] */, const size_t * /* region[3] */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY void * CL_API_CALL clEnqueueMapBuffer(cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_map */, cl_map_flags /* map_flags */, size_t /* offset */, size_t /* cb */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY void * CL_API_CALL clEnqueueMapImage(cl_command_queue /* command_queue */, cl_mem /* image */, cl_bool /* blocking_map */, cl_map_flags /* map_flags */, const size_t * /* origin[3] */, const size_t * /* region[3] */, size_t * /* image_row_pitch */, size_t * /* image_slice_pitch */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueUnmapMemObject(cl_command_queue /* command_queue */, cl_mem /* memobj */, void * /* mapped_ptr */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueNDRangeKernel(cl_command_queue /* command_queue */, cl_kernel /* kernel */, cl_uint /* work_dim */, const size_t * /* global_work_offset */, const size_t * /* global_work_size */, const size_t * /* local_work_size */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueTask(cl_command_queue /* command_queue */, cl_kernel /* kernel */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueNativeKernel(cl_command_queue /* command_queue */, void (CL_CALLBACK *user_func)(void *), void * /* args */, size_t /* cb_args */, cl_uint /* num_mem_objects */, const cl_mem * /* mem_list */, const void ** /* args_mem_loc */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueMarker(cl_command_queue /* command_queue */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueWaitForEvents(cl_command_queue /* command_queue */, cl_uint /* num_events */, const cl_event * /* event_list */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueBarrier(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; /* Extension function access * * Returns the extension function address for the given function name, * or NULL if a valid function can not be found. The client must * check to make sure the address is not NULL, before using or * calling the returned function address. */ extern CL_API_ENTRY void * CL_API_CALL clGetExtensionFunctionAddress(const char * /* func_name */) CL_API_SUFFIX__VERSION_1_0; #ifdef __cplusplus } #endif #endif /* __OPENCL_CL_H */ Release_v0.3/include/CL/cl.hpp000066400000000000000000003344041223142177000162430ustar00rootroot00000000000000/******************************************************************************* * Copyright (c) 2008-2010 The Khronos Group Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and/or associated documentation files (the * "Materials"), to deal in the Materials without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Materials, and to * permit persons to whom the Materials are furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Materials. * * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. ******************************************************************************/ /*! \file * * \brief C++ bindings for OpenCL 1.0 (rev 48) and OpenCL 1.1 (rev 33) * \author Benedict R. Gaster and Laurent Morichetti * * Additions and fixes from Brian Cole, March 3rd 2010. * * \version 1.1 * \date June 2010 * * Optional extension support * * cl * cl_ext_device_fission * #define USE_CL_DEVICE_FISSION */ /*! \mainpage * \section intro Introduction * For many large applications C++ is the language of choice and so it seems * reasonable to define C++ bindings for OpenCL. * * * The interface is contained with a single C++ header file \em cl.hpp and all * definitions are contained within the namespace \em cl. There is no additional * requirement to include \em cl.h and to use either the C++ or original C * bindings it is enough to simply include \em cl.hpp. * * The bindings themselves are lightweight and correspond closely to the * underlying C API. Using the C++ bindings introduces no additional execution * overhead. * * For detail documentation on the bindings see: * * The OpenCL C++ Wrapper API 1.1 (revision 04) * http://www.khronos.org/registry/cl/specs/opencl-cplusplus-1.1.pdf * * \section example Example * * The following example shows a general use case for the C++ * bindings, including support for the optional exception feature and * also the supplied vector and string classes, see following sections for * decriptions of these features. * * \code * #define __CL_ENABLE_EXCEPTIONS * * #if defined(__APPLE__) || defined(__MACOSX) * #include * #else * #include * #endif * #include * #include * #include * * const char * helloStr = "__kernel void " * "hello(void) " * "{ " * " " * "} "; * * int * main(void) * { * cl_int err = CL_SUCCESS; * try { * * std::vector platforms; * cl::Platform::get(&platforms); * if (platforms.size() == 0) { * std::cout << "Platform size 0\n"; * return -1; * } * * cl_context_properties properties[] = * { CL_CONTEXT_PLATFORM, (cl_context_properties)(platforms[0])(), 0}; * cl::Context context(CL_DEVICE_TYPE_CPU, properties); * * std::vector devices = context.getInfo(); * * cl::Program::Sources source(1, * std::make_pair(helloStr,strlen(helloStr))); * cl::Program program_ = cl::Program(context, source); * program_.build(devices); * * cl::Kernel kernel(program_, "hello", &err); * * cl::Event event; * cl::CommandQueue queue(context, devices[0], 0, &err); * queue.enqueueNDRangeKernel( * kernel, * cl::NullRange, * cl::NDRange(4,4), * cl::NullRange, * NULL, * &event); * * event.wait(); * } * catch (cl::Error err) { * std::cerr * << "ERROR: " * << err.what() * << "(" * << err.err() * << ")" * << std::endl; * } * * return EXIT_SUCCESS; * } * * \endcode * */ #ifndef CL_HPP_ #define CL_HPP_ #ifdef _WIN32 #include #include #if defined(USE_DX_INTEROP) #include #endif #endif // _WIN32 // #if defined(USE_CL_DEVICE_FISSION) #include #endif #if defined(__APPLE__) || defined(__MACOSX) #include #include #else #include #include #endif // !__APPLE__ #if !defined(CL_CALLBACK) #define CL_CALLBACK #endif //CL_CALLBACK #include #if !defined(__NO_STD_VECTOR) #include #endif #if !defined(__NO_STD_STRING) #include #endif #if defined(linux) || defined(__APPLE__) || defined(__MACOSX) # include #endif // linux #include /*! \namespace cl * * \brief The OpenCL C++ bindings are defined within this namespace. * */ namespace cl { #define __INIT_CL_EXT_FCN_PTR(name) \ if(!pfn_##name) { \ pfn_##name = (PFN_##name) \ clGetExtensionFunctionAddress(#name); \ if(!pfn_##name) { \ } \ } class Program; class Device; class Context; class CommandQueue; class Memory; #if defined(__CL_ENABLE_EXCEPTIONS) #include /*! \class Error * \brief Exception class */ class Error : public std::exception { private: cl_int err_; const char * errStr_; public: /*! Create a new CL error exception for a given error code * and corresponding message. */ Error(cl_int err, const char * errStr = NULL) : err_(err), errStr_(errStr) {} ~Error() throw() {} /*! \brief Get error string associated with exception * * \return A memory pointer to the error message string. */ virtual const char * what() const throw () { if (errStr_ == NULL) { return "empty"; } else { return errStr_; } } /*! \brief Get error code associated with exception * * \return The error code. */ const cl_int err(void) const { return err_; } }; #define __ERR_STR(x) #x #else #define __ERR_STR(x) NULL #endif // __CL_ENABLE_EXCEPTIONS //! \cond DOXYGEN_DETAIL #if !defined(__CL_USER_OVERRIDE_ERROR_STRINGS) #define __GET_DEVICE_INFO_ERR __ERR_STR(clgetDeviceInfo) #define __GET_PLATFORM_INFO_ERR __ERR_STR(clGetPlatformInfo) #define __GET_DEVICE_IDS_ERR __ERR_STR(clGetDeviceIDs) #define __GET_PLATFORM_IDS_ERR __ERR_STR(clGetPlatformIDs) #define __GET_CONTEXT_INFO_ERR __ERR_STR(clGetContextInfo) #define __GET_EVENT_INFO_ERR __ERR_STR(clGetEventInfo) #define __GET_EVENT_PROFILE_INFO_ERR __ERR_STR(clGetEventProfileInfo) #define __GET_MEM_OBJECT_INFO_ERR __ERR_STR(clGetMemObjectInfo) #define __GET_IMAGE_INFO_ERR __ERR_STR(clGetImageInfo) #define __GET_SAMPLER_INFO_ERR __ERR_STR(clGetSamplerInfo) #define __GET_KERNEL_INFO_ERR __ERR_STR(clGetKernelInfo) #define __GET_KERNEL_WORK_GROUP_INFO_ERR __ERR_STR(clGetKernelWorkGroupInfo) #define __GET_PROGRAM_INFO_ERR __ERR_STR(clGetProgramInfo) #define __GET_PROGRAM_BUILD_INFO_ERR __ERR_STR(clGetProgramBuildInfo) #define __GET_COMMAND_QUEUE_INFO_ERR __ERR_STR(clGetCommandQueueInfo) #define __CREATE_CONTEXT_FROM_TYPE_ERR __ERR_STR(clCreateContextFromType) #define __GET_SUPPORTED_IMAGE_FORMATS_ERR __ERR_STR(clGetSupportedImageFormats) #define __CREATE_BUFFER_ERR __ERR_STR(clCreateBuffer) #define __CREATE_SUBBUFFER_ERR __ERR_STR(clCreateSubBuffer) #define __CREATE_GL_BUFFER_ERR __ERR_STR(clCreateFromGLBuffer) #define __GET_GL_OBJECT_INFO_ERR __ERR_STR(clGetGLObjectInfo) #define __CREATE_IMAGE2D_ERR __ERR_STR(clCreateImage2D) #define __CREATE_IMAGE3D_ERR __ERR_STR(clCreateImage3D) #define __CREATE_SAMPLER_ERR __ERR_STR(clCreateSampler) #define __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR __ERR_STR(clSetMemObjectDestructorCallback) #define __CREATE_USER_EVENT_ERR __ERR_STR(clCreateUserEvent) #define __SET_USER_EVENT_STATUS_ERR __ERR_STR(clSetUserEventStatus) #define __SET_EVENT_CALLBACK_ERR __ERR_STR(clSetEventCallback) #define __WAIT_FOR_EVENTS_ERR __ERR_STR(clWaitForEvents) #define __CREATE_KERNEL_ERR __ERR_STR(clCreateKernel) #define __SET_KERNEL_ARGS_ERR __ERR_STR(clSetKernelArg) #define __CREATE_PROGRAM_WITH_SOURCE_ERR __ERR_STR(clCreateProgramWithSource) #define __CREATE_PROGRAM_WITH_BINARY_ERR __ERR_STR(clCreateProgramWithBinary) #define __BUILD_PROGRAM_ERR __ERR_STR(clBuildProgram) #define __CREATE_KERNELS_IN_PROGRAM_ERR __ERR_STR(clCreateKernelsInProgram) #define __CREATE_COMMAND_QUEUE_ERR __ERR_STR(clCreateCommandQueue) #define __SET_COMMAND_QUEUE_PROPERTY_ERR __ERR_STR(clSetCommandQueueProperty) #define __ENQUEUE_READ_BUFFER_ERR __ERR_STR(clEnqueueReadBuffer) #define __ENQUEUE_READ_BUFFER_RECT_ERR __ERR_STR(clEnqueueReadBufferRect) #define __ENQUEUE_WRITE_BUFFER_ERR __ERR_STR(clEnqueueWriteBuffer) #define __ENQUEUE_WRITE_BUFFER_RECT_ERR __ERR_STR(clEnqueueWriteBufferRect) #define __ENQEUE_COPY_BUFFER_ERR __ERR_STR(clEnqueueCopyBuffer) #define __ENQEUE_COPY_BUFFER_RECT_ERR __ERR_STR(clEnqueueCopyBufferRect) #define __ENQUEUE_READ_IMAGE_ERR __ERR_STR(clEnqueueReadImage) #define __ENQUEUE_WRITE_IMAGE_ERR __ERR_STR(clEnqueueWriteImage) #define __ENQUEUE_COPY_IMAGE_ERR __ERR_STR(clEnqueueCopyImage) #define __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR __ERR_STR(clEnqueueCopyImageToBuffer) #define __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR __ERR_STR(clEnqueueCopyBufferToImage) #define __ENQUEUE_MAP_BUFFER_ERR __ERR_STR(clEnqueueMapBuffer) #define __ENQUEUE_MAP_IMAGE_ERR __ERR_STR(clEnqueueMapImage) #define __ENQUEUE_UNMAP_MEM_OBJECT_ERR __ERR_STR(clEnqueueUnMapMemObject) #define __ENQUEUE_NDRANGE_KERNEL_ERR __ERR_STR(clEnqueueNDRangeKernel) #define __ENQUEUE_TASK_ERR __ERR_STR(clEnqueueTask) #define __ENQUEUE_NATIVE_KERNEL __ERR_STR(clEnqueueNativeKernel) #define __ENQUEUE_MARKER_ERR __ERR_STR(clEnqueueMarker) #define __ENQUEUE_WAIT_FOR_EVENTS_ERR __ERR_STR(clEnqueueWaitForEvents) #define __ENQUEUE_BARRIER_ERR __ERR_STR(clEnqueueBarrier) #define __ENQUEUE_ACQUIRE_GL_ERR __ERR_STR(clEnqueueAcquireGLObjects) #define __ENQUEUE_RELEASE_GL_ERR __ERR_STR(clEnqueueReleaseGLObjects) #define __UNLOAD_COMPILER_ERR __ERR_STR(clUnloadCompiler) #define __FLUSH_ERR __ERR_STR(clFlush) #define __FINISH_ERR __ERR_STR(clFinish) #define __CREATE_SUB_DEVICES __ERR_STR(clCreateSubDevicesEXT) #endif // __CL_USER_OVERRIDE_ERROR_STRINGS //! \endcond /*! \class string * \brief Simple string class, that provides a limited subset of std::string * functionality but avoids many of the issues that come with that class. */ class string { private: ::size_t size_; char * str_; public: string(void) : size_(0), str_(NULL) { } string(char * str, ::size_t size) : size_(size), str_(NULL) { str_ = new char[size_+1]; if (str_ != NULL) { memcpy(str_, str, size_ * sizeof(char)); str_[size_] = '\0'; } else { size_ = 0; } } string(char * str) : str_(NULL) { size_= ::strlen(str); str_ = new char[size_ + 1]; if (str_ != NULL) { memcpy(str_, str, (size_ + 1) * sizeof(char)); } else { size_ = 0; } } string& operator=(const string& rhs) { if (this == &rhs) { return *this; } if (rhs.size_ == 0 || rhs.str_ == NULL) { size_ = 0; str_ = NULL; } else { size_ = rhs.size_; str_ = new char[size_ + 1]; if (str_ != NULL) { memcpy(str_, rhs.str_, (size_ + 1) * sizeof(char)); } else { size_ = 0; } } return *this; } string(const string& rhs) { *this = rhs; } ~string() { if (str_ != NULL) { delete[] str_; } } ::size_t size(void) const { return size_; } ::size_t length(void) const { return size(); } const char * c_str(void) const { return (str_) ? str_ : "";} }; #if !defined(__USE_DEV_STRING) && !defined(__NO_STD_STRING) #include typedef std::string STRING_CLASS; #elif !defined(__USE_DEV_STRING) typedef cl::string STRING_CLASS; #endif #if !defined(__USE_DEV_VECTOR) && !defined(__NO_STD_VECTOR) #include #define VECTOR_CLASS std::vector #elif !defined(__USE_DEV_VECTOR) #define VECTOR_CLASS cl::vector #endif #if !defined(__MAX_DEFAULT_VECTOR_SIZE) #define __MAX_DEFAULT_VECTOR_SIZE 10 #endif /*! \class vector * \brief Fixed sized vector implementation that mirroring * std::vector functionality. */ template class vector { private: T data_[N]; unsigned int size_; bool empty_; public: vector() : size_(-1), empty_(true) {} ~vector() {} unsigned int size(void) const { return size_ + 1; } void clear() { size_ = -1; empty_ = true; } void push_back (const T& x) { if (size() < N) { size_++; data_[size_] = x; empty_ = false; } } void pop_back(void) { if (!empty_) { data_[size_].~T(); size_--; if (size_ == -1) { empty_ = true; } } } vector(const vector& vec) : size_(vec.size_), empty_(vec.empty_) { if (!empty_) { memcpy(&data_[0], &vec.data_[0], size() * sizeof(T)); } } vector(unsigned int size, const T& val = T()) : size_(-1), empty_(true) { for (unsigned int i = 0; i < size; i++) { push_back(val); } } vector& operator=(const vector& rhs) { if (this == &rhs) { return *this; } size_ = rhs.size_; empty_ = rhs.empty_; if (!empty_) { memcpy(&data_[0], &rhs.data_[0], size() * sizeof(T)); } return *this; } bool operator==(vector &vec) { if (empty_ && vec.empty_) { return true; } if (size() != vec.size()) { return false; } return memcmp(&data_[0], &vec.data_[0], size() * sizeof(T)) == 0 ? true : false; } operator T* () { return data_; } operator const T* () const { return data_; } bool empty (void) const { return empty_; } unsigned int max_size (void) const { return N; } unsigned int capacity () const { return sizeof(T) * N; } T& operator[](int index) { return data_[index]; } T operator[](int index) const { return data_[index]; } template void assign(I start, I end) { clear(); while(start < end) { push_back(*start); start++; } } /*! \class iterator * \brief Iterator class for vectors */ class iterator { private: vector vec_; int index_; bool initialized_; public: iterator(void) : index_(-1), initialized_(false) { index_ = -1; initialized_ = false; } ~iterator(void) {} static iterator begin(vector &vec) { iterator i; if (!vec.empty()) { i.index_ = 0; } i.vec_ = vec; i.initialized_ = true; return i; } static iterator end(vector &vec) { iterator i; if (!vec.empty()) { i.index_ = vec.size(); } i.vec_ = vec; i.initialized_ = true; return i; } bool operator==(iterator i) { return ((vec_ == i.vec_) && (index_ == i.index_) && (initialized_ == i.initialized_)); } bool operator!=(iterator i) { return (!(*this==i)); } void operator++() { index_++; } void operator++(int x) { index_ += x; } void operator--() { index_--; } void operator--(int x) { index_ -= x; } T operator *() { return vec_[index_]; } }; iterator begin(void) { return iterator::begin(*this); } iterator end(void) { return iterator::end(*this); } T& front(void) { return data_[0]; } T& back(void) { return data_[size_]; } const T& front(void) const { return data_[0]; } const T& back(void) const { return data_[size_]; } }; /*! * \brief size_t class used to interface between C++ and * OpenCL C calls that require arrays of size_t values, who's * size is known statically. */ template struct size_t : public cl::vector< ::size_t, N> { }; namespace detail { // GetInfo help struct template struct GetInfoHelper { static cl_int get(Functor f, cl_uint name, T* param) { return f(name, sizeof(T), param, NULL); } }; // Specialized GetInfoHelper for VECTOR_CLASS params template struct GetInfoHelper > { static cl_int get(Func f, cl_uint name, VECTOR_CLASS* param) { ::size_t required; cl_int err = f(name, 0, NULL, &required); if (err != CL_SUCCESS) { return err; } T* value = (T*) alloca(required); err = f(name, required, value, NULL); if (err != CL_SUCCESS) { return err; } param->assign(&value[0], &value[required/sizeof(T)]); return CL_SUCCESS; } }; // Specialized for getInfo template struct GetInfoHelper > { static cl_int get(Func f, cl_uint name, VECTOR_CLASS* param) { cl_uint err = f(name, param->size() * sizeof(char *), &(*param)[0], NULL); if (err != CL_SUCCESS) { return err; } return CL_SUCCESS; } }; // Specialized GetInfoHelper for STRING_CLASS params template struct GetInfoHelper { static cl_int get(Func f, cl_uint name, STRING_CLASS* param) { ::size_t required; cl_int err = f(name, 0, NULL, &required); if (err != CL_SUCCESS) { return err; } char* value = (char*) alloca(required); err = f(name, required, value, NULL); if (err != CL_SUCCESS) { return err; } *param = value; return CL_SUCCESS; } }; #define __GET_INFO_HELPER_WITH_RETAIN(CPP_TYPE) \ namespace detail { \ template \ struct GetInfoHelper \ { \ static cl_int get(Func f, cl_uint name, CPP_TYPE* param) \ { \ cl_uint err = f(name, sizeof(CPP_TYPE), param, NULL); \ if (err != CL_SUCCESS) { \ return err; \ } \ \ return ReferenceHandler::retain((*param)()); \ } \ }; \ } #define __PARAM_NAME_INFO_1_0(F) \ F(cl_platform_info, CL_PLATFORM_PROFILE, STRING_CLASS) \ F(cl_platform_info, CL_PLATFORM_VERSION, STRING_CLASS) \ F(cl_platform_info, CL_PLATFORM_NAME, STRING_CLASS) \ F(cl_platform_info, CL_PLATFORM_VENDOR, STRING_CLASS) \ F(cl_platform_info, CL_PLATFORM_EXTENSIONS, STRING_CLASS) \ \ F(cl_device_info, CL_DEVICE_TYPE, cl_device_type) \ F(cl_device_info, CL_DEVICE_VENDOR_ID, cl_uint) \ F(cl_device_info, CL_DEVICE_MAX_COMPUTE_UNITS, cl_uint) \ F(cl_device_info, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, cl_uint) \ F(cl_device_info, CL_DEVICE_MAX_WORK_GROUP_SIZE, ::size_t) \ F(cl_device_info, CL_DEVICE_MAX_WORK_ITEM_SIZES, VECTOR_CLASS< ::size_t>) \ F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, cl_uint) \ F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, cl_uint) \ F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, cl_uint) \ F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, cl_uint) \ F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, cl_uint) \ F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, cl_uint) \ F(cl_device_info, CL_DEVICE_MAX_CLOCK_FREQUENCY, cl_uint) \ F(cl_device_info, CL_DEVICE_ADDRESS_BITS, cl_bitfield) \ F(cl_device_info, CL_DEVICE_MAX_READ_IMAGE_ARGS, cl_uint) \ F(cl_device_info, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, cl_uint) \ F(cl_device_info, CL_DEVICE_MAX_MEM_ALLOC_SIZE, cl_ulong) \ F(cl_device_info, CL_DEVICE_IMAGE2D_MAX_WIDTH, ::size_t) \ F(cl_device_info, CL_DEVICE_IMAGE2D_MAX_HEIGHT, ::size_t) \ F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_WIDTH, ::size_t) \ F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_HEIGHT, ::size_t) \ F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_DEPTH, ::size_t) \ F(cl_device_info, CL_DEVICE_IMAGE_SUPPORT, cl_uint) \ F(cl_device_info, CL_DEVICE_MAX_PARAMETER_SIZE, ::size_t) \ F(cl_device_info, CL_DEVICE_MAX_SAMPLERS, cl_uint) \ F(cl_device_info, CL_DEVICE_MEM_BASE_ADDR_ALIGN, cl_uint) \ F(cl_device_info, CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, cl_uint) \ F(cl_device_info, CL_DEVICE_SINGLE_FP_CONFIG, cl_device_fp_config) \ F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, cl_device_mem_cache_type) \ F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, cl_uint)\ F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, cl_ulong) \ F(cl_device_info, CL_DEVICE_GLOBAL_MEM_SIZE, cl_ulong) \ F(cl_device_info, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, cl_ulong) \ F(cl_device_info, CL_DEVICE_MAX_CONSTANT_ARGS, cl_uint) \ F(cl_device_info, CL_DEVICE_LOCAL_MEM_TYPE, cl_device_local_mem_type) \ F(cl_device_info, CL_DEVICE_LOCAL_MEM_SIZE, cl_ulong) \ F(cl_device_info, CL_DEVICE_ERROR_CORRECTION_SUPPORT, cl_bool) \ F(cl_device_info, CL_DEVICE_PROFILING_TIMER_RESOLUTION, ::size_t) \ F(cl_device_info, CL_DEVICE_ENDIAN_LITTLE, cl_bool) \ F(cl_device_info, CL_DEVICE_AVAILABLE, cl_bool) \ F(cl_device_info, CL_DEVICE_COMPILER_AVAILABLE, cl_bool) \ F(cl_device_info, CL_DEVICE_EXECUTION_CAPABILITIES, cl_device_exec_capabilities) \ F(cl_device_info, CL_DEVICE_QUEUE_PROPERTIES, cl_command_queue_properties) \ F(cl_device_info, CL_DEVICE_PLATFORM, cl_platform_id) \ F(cl_device_info, CL_DEVICE_NAME, STRING_CLASS) \ F(cl_device_info, CL_DEVICE_VENDOR, STRING_CLASS) \ F(cl_device_info, CL_DRIVER_VERSION, STRING_CLASS) \ F(cl_device_info, CL_DEVICE_PROFILE, STRING_CLASS) \ F(cl_device_info, CL_DEVICE_VERSION, STRING_CLASS) \ F(cl_device_info, CL_DEVICE_EXTENSIONS, STRING_CLASS) \ \ F(cl_context_info, CL_CONTEXT_REFERENCE_COUNT, cl_uint) \ F(cl_context_info, CL_CONTEXT_DEVICES, VECTOR_CLASS) \ F(cl_context_info, CL_CONTEXT_PROPERTIES, VECTOR_CLASS) \ \ F(cl_event_info, CL_EVENT_COMMAND_QUEUE, cl::CommandQueue) \ F(cl_event_info, CL_EVENT_COMMAND_TYPE, cl_command_type) \ F(cl_event_info, CL_EVENT_REFERENCE_COUNT, cl_uint) \ F(cl_event_info, CL_EVENT_COMMAND_EXECUTION_STATUS, cl_uint) \ \ F(cl_profiling_info, CL_PROFILING_COMMAND_QUEUED, cl_ulong) \ F(cl_profiling_info, CL_PROFILING_COMMAND_SUBMIT, cl_ulong) \ F(cl_profiling_info, CL_PROFILING_COMMAND_START, cl_ulong) \ F(cl_profiling_info, CL_PROFILING_COMMAND_END, cl_ulong) \ \ F(cl_mem_info, CL_MEM_TYPE, cl_mem_object_type) \ F(cl_mem_info, CL_MEM_FLAGS, cl_mem_flags) \ F(cl_mem_info, CL_MEM_SIZE, ::size_t) \ F(cl_mem_info, CL_MEM_HOST_PTR, void*) \ F(cl_mem_info, CL_MEM_MAP_COUNT, cl_uint) \ F(cl_mem_info, CL_MEM_REFERENCE_COUNT, cl_uint) \ F(cl_mem_info, CL_MEM_CONTEXT, cl::Context) \ \ F(cl_image_info, CL_IMAGE_FORMAT, cl_image_format) \ F(cl_image_info, CL_IMAGE_ELEMENT_SIZE, ::size_t) \ F(cl_image_info, CL_IMAGE_ROW_PITCH, ::size_t) \ F(cl_image_info, CL_IMAGE_SLICE_PITCH, ::size_t) \ F(cl_image_info, CL_IMAGE_WIDTH, ::size_t) \ F(cl_image_info, CL_IMAGE_HEIGHT, ::size_t) \ F(cl_image_info, CL_IMAGE_DEPTH, ::size_t) \ \ F(cl_sampler_info, CL_SAMPLER_REFERENCE_COUNT, cl_uint) \ F(cl_sampler_info, CL_SAMPLER_CONTEXT, cl::Context) \ F(cl_sampler_info, CL_SAMPLER_NORMALIZED_COORDS, cl_addressing_mode) \ F(cl_sampler_info, CL_SAMPLER_ADDRESSING_MODE, cl_filter_mode) \ F(cl_sampler_info, CL_SAMPLER_FILTER_MODE, cl_bool) \ \ F(cl_program_info, CL_PROGRAM_REFERENCE_COUNT, cl_uint) \ F(cl_program_info, CL_PROGRAM_CONTEXT, cl::Context) \ F(cl_program_info, CL_PROGRAM_NUM_DEVICES, cl_uint) \ F(cl_program_info, CL_PROGRAM_DEVICES, VECTOR_CLASS) \ F(cl_program_info, CL_PROGRAM_SOURCE, STRING_CLASS) \ F(cl_program_info, CL_PROGRAM_BINARY_SIZES, VECTOR_CLASS< ::size_t>) \ F(cl_program_info, CL_PROGRAM_BINARIES, VECTOR_CLASS) \ \ F(cl_program_build_info, CL_PROGRAM_BUILD_STATUS, cl_build_status) \ F(cl_program_build_info, CL_PROGRAM_BUILD_OPTIONS, STRING_CLASS) \ F(cl_program_build_info, CL_PROGRAM_BUILD_LOG, STRING_CLASS) \ \ F(cl_kernel_info, CL_KERNEL_FUNCTION_NAME, STRING_CLASS) \ F(cl_kernel_info, CL_KERNEL_NUM_ARGS, cl_uint) \ F(cl_kernel_info, CL_KERNEL_REFERENCE_COUNT, cl_uint) \ F(cl_kernel_info, CL_KERNEL_CONTEXT, cl::Context) \ F(cl_kernel_info, CL_KERNEL_PROGRAM, cl::Program) \ \ F(cl_kernel_work_group_info, CL_KERNEL_WORK_GROUP_SIZE, ::size_t) \ F(cl_kernel_work_group_info, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, cl::size_t<3>) \ F(cl_kernel_work_group_info, CL_KERNEL_LOCAL_MEM_SIZE, cl_ulong) \ \ F(cl_command_queue_info, CL_QUEUE_CONTEXT, cl::Context) \ F(cl_command_queue_info, CL_QUEUE_DEVICE, cl::Device) \ F(cl_command_queue_info, CL_QUEUE_REFERENCE_COUNT, cl_uint) \ F(cl_command_queue_info, CL_QUEUE_PROPERTIES, cl_command_queue_properties) #if defined(CL_VERSION_1_1) #define __PARAM_NAME_INFO_1_1(F) \ F(cl_context_info, CL_CONTEXT_NUM_DEVICES, cl_uint)\ F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF, cl_uint) \ F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR, cl_uint) \ F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT, cl_uint) \ F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, cl_uint) \ F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, cl_uint) \ F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, cl_uint) \ F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, cl_uint) \ F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, cl_uint) \ F(cl_device_info, CL_DEVICE_DOUBLE_FP_CONFIG, cl_device_fp_config) \ F(cl_device_info, CL_DEVICE_HALF_FP_CONFIG, cl_device_fp_config) \ F(cl_device_info, CL_DEVICE_HOST_UNIFIED_MEMORY, cl_bool) \ \ F(cl_mem_info, CL_MEM_ASSOCIATED_MEMOBJECT, cl::Memory) \ F(cl_mem_info, CL_MEM_OFFSET, ::size_t) \ \ F(cl_kernel_work_group_info, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, ::size_t) \ F(cl_kernel_work_group_info, CL_KERNEL_PRIVATE_MEM_SIZE, cl_ulong) \ \ F(cl_event_info, CL_EVENT_CONTEXT, cl::Context) #endif // CL_VERSION_1_1 #if defined(USE_CL_DEVICE_FISSION) #define __PARAM_NAME_DEVICE_FISSION(F) \ F(cl_device_info, CL_DEVICE_PARENT_DEVICE_EXT, cl_device_id) \ F(cl_device_info, CL_DEVICE_PARTITION_TYPES_EXT, VECTOR_CLASS) \ F(cl_device_info, CL_DEVICE_AFFINITY_DOMAINS_EXT, VECTOR_CLASS) \ F(cl_device_info, CL_DEVICE_REFERENCE_COUNT_EXT , cl_uint) \ F(cl_device_info, CL_DEVICE_PARTITION_STYLE_EXT, VECTOR_CLASS) #endif // USE_CL_DEVICE_FISSION template struct param_traits {}; #define __DECLARE_PARAM_TRAITS(token, param_name, T) \ struct token; \ template<> \ struct param_traits \ { \ enum { value = param_name }; \ typedef T param_type; \ }; __PARAM_NAME_INFO_1_0(__DECLARE_PARAM_TRAITS); #if defined(CL_VERSION_1_1) __PARAM_NAME_INFO_1_1(__DECLARE_PARAM_TRAITS); #endif // CL_VERSION_1_1 #if defined(USE_CL_DEVICE_FISSION) __PARAM_NAME_DEVICE_FISSION(__DECLARE_PARAM_TRAITS); #endif // USE_CL_DEVICE_FISSION #undef __DECLARE_PARAM_TRAITS // Convenience functions template inline cl_int getInfo(Func f, cl_uint name, T* param) { return GetInfoHelper::get(f, name, param); } template struct GetInfoFunctor0 { Func f_; const Arg0& arg0_; cl_int operator ()( cl_uint param, ::size_t size, void* value, ::size_t* size_ret) { return f_(arg0_, param, size, value, size_ret); } }; template struct GetInfoFunctor1 { Func f_; const Arg0& arg0_; const Arg1& arg1_; cl_int operator ()( cl_uint param, ::size_t size, void* value, ::size_t* size_ret) { return f_(arg0_, arg1_, param, size, value, size_ret); } }; template inline cl_int getInfo(Func f, const Arg0& arg0, cl_uint name, T* param) { GetInfoFunctor0 f0 = { f, arg0 }; return GetInfoHelper, T> ::get(f0, name, param); } template inline cl_int getInfo(Func f, const Arg0& arg0, const Arg1& arg1, cl_uint name, T* param) { GetInfoFunctor1 f0 = { f, arg0, arg1 }; return GetInfoHelper, T> ::get(f0, name, param); } template struct ReferenceHandler { }; template <> struct ReferenceHandler { // cl_device_id does not have retain(). static cl_int retain(cl_device_id) { return CL_INVALID_DEVICE; } // cl_device_id does not have release(). static cl_int release(cl_device_id) { return CL_INVALID_DEVICE; } }; template <> struct ReferenceHandler { // cl_platform_id does not have retain(). static cl_int retain(cl_platform_id) { return CL_INVALID_PLATFORM; } // cl_platform_id does not have release(). static cl_int release(cl_platform_id) { return CL_INVALID_PLATFORM; } }; template <> struct ReferenceHandler { static cl_int retain(cl_context context) { return ::clRetainContext(context); } static cl_int release(cl_context context) { return ::clReleaseContext(context); } }; template <> struct ReferenceHandler { static cl_int retain(cl_command_queue queue) { return ::clRetainCommandQueue(queue); } static cl_int release(cl_command_queue queue) { return ::clReleaseCommandQueue(queue); } }; template <> struct ReferenceHandler { static cl_int retain(cl_mem memory) { return ::clRetainMemObject(memory); } static cl_int release(cl_mem memory) { return ::clReleaseMemObject(memory); } }; template <> struct ReferenceHandler { static cl_int retain(cl_sampler sampler) { return ::clRetainSampler(sampler); } static cl_int release(cl_sampler sampler) { return ::clReleaseSampler(sampler); } }; template <> struct ReferenceHandler { static cl_int retain(cl_program program) { return ::clRetainProgram(program); } static cl_int release(cl_program program) { return ::clReleaseProgram(program); } }; template <> struct ReferenceHandler { static cl_int retain(cl_kernel kernel) { return ::clRetainKernel(kernel); } static cl_int release(cl_kernel kernel) { return ::clReleaseKernel(kernel); } }; template <> struct ReferenceHandler { static cl_int retain(cl_event event) { return ::clRetainEvent(event); } static cl_int release(cl_event event) { return ::clReleaseEvent(event); } }; template class Wrapper { public: typedef T cl_type; protected: cl_type object_; public: Wrapper() : object_(NULL) { } ~Wrapper() { if (object_ != NULL) { release(); } } Wrapper(const Wrapper& rhs) { object_ = rhs.object_; if (object_ != NULL) { retain(); } } Wrapper& operator = (const Wrapper& rhs) { if (object_ != NULL) { release(); } object_ = rhs.object_; if (object_ != NULL) { retain(); } return *this; } cl_type operator ()() const { return object_; } cl_type& operator ()() { return object_; } protected: cl_int retain() const { return ReferenceHandler::retain(object_); } cl_int release() const { return ReferenceHandler::release(object_); } }; #if defined(__CL_ENABLE_EXCEPTIONS) static inline cl_int errHandler ( cl_int err, const char * errStr = NULL) throw(Error) { if (err != CL_SUCCESS) { throw Error(err, errStr); } return err; } #else static inline cl_int errHandler (cl_int err, const char * errStr = NULL) { return err; } #endif // __CL_ENABLE_EXCEPTIONS } // namespace detail //! \endcond /*! \stuct ImageFormat * \brief ImageFormat interface fro cl_image_format. */ struct ImageFormat : public cl_image_format { ImageFormat(){} ImageFormat(cl_channel_order order, cl_channel_type type) { image_channel_order = order; image_channel_data_type = type; } ImageFormat& operator = (const ImageFormat& rhs) { if (this != &rhs) { this->image_channel_data_type = rhs.image_channel_data_type; this->image_channel_order = rhs.image_channel_order; } return *this; } }; /*! \class Device * \brief Device interface for cl_device_id. */ class Device : public detail::Wrapper { public: Device(cl_device_id device) { object_ = device; } Device() : detail::Wrapper() { } Device(const Device& device) : detail::Wrapper(device) { } Device& operator = (const Device& rhs) { if (this != &rhs) { detail::Wrapper::operator=(rhs); } return *this; } template cl_int getInfo(cl_device_info name, T* param) const { return detail::errHandler( detail::getInfo(&::clGetDeviceInfo, object_, name, param), __GET_DEVICE_INFO_ERR); } template typename detail::param_traits::param_type getInfo(cl_int* err = NULL) const { typename detail::param_traits< detail::cl_device_info, name>::param_type param; cl_int result = getInfo(name, ¶m); if (err != NULL) { *err = result; } return param; } #if defined(USE_CL_DEVICE_FISSION) cl_int createSubDevices( const cl_device_partition_property_ext * properties, VECTOR_CLASS* devices) { typedef CL_API_ENTRY cl_int ( CL_API_CALL * PFN_clCreateSubDevicesEXT)( cl_device_id /*in_device*/, const cl_device_partition_property_ext * /* properties */, cl_uint /*num_entries*/, cl_device_id * /*out_devices*/, cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1; static PFN_clCreateSubDevicesEXT pfn_clCreateSubDevicesEXT = NULL; __INIT_CL_EXT_FCN_PTR(clCreateSubDevicesEXT); cl_uint n = 0; cl_int err = pfn_clCreateSubDevicesEXT(object_, properties, 0, NULL, &n); if (err != CL_SUCCESS) { return detail::errHandler(err, __CREATE_SUB_DEVICES); } cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id)); err = pfn_clCreateSubDevicesEXT(object_, properties, n, ids, NULL); if (err != CL_SUCCESS) { return detail::errHandler(err, __CREATE_SUB_DEVICES); } devices->assign(&ids[0], &ids[n]); return CL_SUCCESS; } #endif }; /*! \class Platform * \brief Platform interface. */ class Platform : public detail::Wrapper { public: static const Platform null(); Platform(cl_platform_id platform) { object_ = platform; } Platform() : detail::Wrapper() { } Platform(const Platform& platform) : detail::Wrapper(platform) { } Platform& operator = (const Platform& rhs) { if (this != &rhs) { detail::Wrapper::operator=(rhs); } return *this; } cl_int getInfo(cl_platform_info name, STRING_CLASS* param) const { return detail::errHandler( detail::getInfo(&::clGetPlatformInfo, object_, name, param), __GET_PLATFORM_INFO_ERR); } template typename detail::param_traits::param_type getInfo(cl_int* err = NULL) const { typename detail::param_traits< detail::cl_platform_info, name>::param_type param; cl_int result = getInfo(name, ¶m); if (err != NULL) { *err = result; } return param; } cl_int getDevices( cl_device_type type, VECTOR_CLASS* devices) const { cl_uint n = 0; cl_int err = ::clGetDeviceIDs(object_, type, 0, NULL, &n); if (err != CL_SUCCESS) { return detail::errHandler(err, __GET_DEVICE_IDS_ERR); } cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id)); err = ::clGetDeviceIDs(object_, type, n, ids, NULL); if (err != CL_SUCCESS) { return detail::errHandler(err, __GET_DEVICE_IDS_ERR); } devices->assign(&ids[0], &ids[n]); return CL_SUCCESS; } #if defined(USE_DX_INTEROP) /*! \brief Get the list of available D3D10 devices. * * \param d3d_device_source. * * \param d3d_object. * * \param d3d_device_set. * * \param devices returns a vector of OpenCL D3D10 devices found. The cl::Device * values returned in devices can be used to identify a specific OpenCL * device. If \a devices argument is NULL, this argument is ignored. * * \return One of the following values: * - CL_SUCCESS if the function is executed successfully. * * The application can query specific capabilities of the OpenCL device(s) * returned by cl::getDevices. This can be used by the application to * determine which device(s) to use. * * \note In the case that exceptions are enabled and a return value * other than CL_SUCCESS is generated, then cl::Error exception is * generated. */ cl_int getDevices( cl_d3d10_device_source_khr d3d_device_source, void * d3d_object, cl_d3d10_device_set_khr d3d_device_set, VECTOR_CLASS* devices) const { typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clGetDeviceIDsFromD3D10KHR)( cl_platform_id platform, cl_d3d10_device_source_khr d3d_device_source, void * d3d_object, cl_d3d10_device_set_khr d3d_device_set, cl_uint num_entries, cl_device_id * devices, cl_uint* num_devices); static PFN_clGetDeviceIDsFromD3D10KHR pfn_clGetDeviceIDsFromD3D10KHR = NULL; __INIT_CL_EXT_FCN_PTR(clGetDeviceIDsFromD3D10KHR); cl_uint n = 0; cl_int err = pfn_clGetDeviceIDsFromD3D10KHR( object_, d3d_device_source, d3d_object, d3d_device_set, 0, NULL, &n); if (err != CL_SUCCESS) { return detail::errHandler(err, __GET_DEVICE_IDS_ERR); } cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id)); err = pfn_clGetDeviceIDsFromD3D10KHR( object_, d3d_device_source, d3d_object, d3d_device_set, n, ids, NULL); if (err != CL_SUCCESS) { return detail::errHandler(err, __GET_DEVICE_IDS_ERR); } devices->assign(&ids[0], &ids[n]); return CL_SUCCESS; } #endif static cl_int get( VECTOR_CLASS* platforms) { cl_uint n = 0; cl_int err = ::clGetPlatformIDs(0, NULL, &n); if (err != CL_SUCCESS) { return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); } cl_platform_id* ids = (cl_platform_id*) alloca( n * sizeof(cl_platform_id)); err = ::clGetPlatformIDs(n, ids, NULL); if (err != CL_SUCCESS) { return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); } platforms->assign(&ids[0], &ids[n]); return CL_SUCCESS; } }; static inline cl_int UnloadCompiler() { return ::clUnloadCompiler(); } class Context : public detail::Wrapper { public: Context( const VECTOR_CLASS& devices, cl_context_properties* properties = NULL, void (CL_CALLBACK * notifyFptr)( const char *, const void *, ::size_t, void *) = NULL, void* data = NULL, cl_int* err = NULL) { cl_int error; object_ = ::clCreateContext( properties, (cl_uint) devices.size(), (cl_device_id*) &devices.front(), notifyFptr, data, &error); detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR); if (err != NULL) { *err = error; } } Context( cl_device_type type, cl_context_properties* properties = NULL, void (CL_CALLBACK * notifyFptr)( const char *, const void *, ::size_t, void *) = NULL, void* data = NULL, cl_int* err = NULL) { cl_int error; object_ = ::clCreateContextFromType( properties, type, notifyFptr, data, &error); detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR); if (err != NULL) { *err = error; } } Context() : detail::Wrapper() { } Context(const Context& context) : detail::Wrapper(context) { } Context& operator = (const Context& rhs) { if (this != &rhs) { detail::Wrapper::operator=(rhs); } return *this; } template cl_int getInfo(cl_context_info name, T* param) const { return detail::errHandler( detail::getInfo(&::clGetContextInfo, object_, name, param), __GET_CONTEXT_INFO_ERR); } template typename detail::param_traits::param_type getInfo(cl_int* err = NULL) const { typename detail::param_traits< detail::cl_context_info, name>::param_type param; cl_int result = getInfo(name, ¶m); if (err != NULL) { *err = result; } return param; } cl_int getSupportedImageFormats( cl_mem_flags flags, cl_mem_object_type type, VECTOR_CLASS* formats) const { cl_uint numEntries; cl_int err = ::clGetSupportedImageFormats( object_, flags, type, 0, NULL, &numEntries); if (err != CL_SUCCESS) { return detail::errHandler(err, __GET_SUPPORTED_IMAGE_FORMATS_ERR); } ImageFormat* value = (ImageFormat*) alloca(numEntries * sizeof(ImageFormat)); err = ::clGetSupportedImageFormats( object_, flags, type, numEntries, (cl_image_format*) value, NULL); if (err != CL_SUCCESS) { return detail::errHandler(err, __GET_SUPPORTED_IMAGE_FORMATS_ERR); } formats->assign(&value[0], &value[numEntries]); return CL_SUCCESS; } }; __GET_INFO_HELPER_WITH_RETAIN(cl::Context) /*! \class Event * \brief Event interface for cl_event. */ class Event : public detail::Wrapper { public: Event() : detail::Wrapper() { } Event(const Event& event) : detail::Wrapper(event) { } Event& operator = (const Event& rhs) { if (this != &rhs) { detail::Wrapper::operator=(rhs); } return *this; } template cl_int getInfo(cl_event_info name, T* param) const { return detail::errHandler( detail::getInfo(&::clGetEventInfo, object_, name, param), __GET_EVENT_INFO_ERR); } template typename detail::param_traits::param_type getInfo(cl_int* err = NULL) const { typename detail::param_traits< detail::cl_event_info, name>::param_type param; cl_int result = getInfo(name, ¶m); if (err != NULL) { *err = result; } return param; } template cl_int getProfilingInfo(cl_profiling_info name, T* param) const { return detail::errHandler(detail::getInfo( &::clGetEventProfilingInfo, object_, name, param), __GET_EVENT_PROFILE_INFO_ERR); } template typename detail::param_traits::param_type getProfilingInfo(cl_int* err = NULL) const { typename detail::param_traits< detail::cl_profiling_info, name>::param_type param; cl_int result = getProfilingInfo(name, ¶m); if (err != NULL) { *err = result; } return param; } cl_int wait() const { return detail::errHandler( ::clWaitForEvents(1, &object_), __WAIT_FOR_EVENTS_ERR); } #if defined(CL_VERSION_1_1) cl_int setCallback( cl_int type, void (CL_CALLBACK * pfn_notify)(cl_event, cl_int, void *), void * user_data = NULL) { return detail::errHandler( ::clSetEventCallback( object_, type, pfn_notify, user_data), __SET_EVENT_CALLBACK_ERR); } #endif static cl_int waitForEvents(const VECTOR_CLASS& events) { return detail::errHandler( ::clWaitForEvents( (cl_uint) events.size(), (cl_event*)&events.front()), __WAIT_FOR_EVENTS_ERR); } }; __GET_INFO_HELPER_WITH_RETAIN(cl::Event) #if defined(CL_VERSION_1_1) /*! \class UserEvent * \brief User event interface for cl_event. */ class UserEvent : public Event { public: UserEvent( const Context& context, cl_int * err = NULL) { cl_int error; object_ = ::clCreateUserEvent( context(), &error); detail::errHandler(error, __CREATE_USER_EVENT_ERR); if (err != NULL) { *err = error; } } UserEvent() : Event() { } UserEvent(const UserEvent& event) : Event(event) { } UserEvent& operator = (const UserEvent& rhs) { if (this != &rhs) { Event::operator=(rhs); } return *this; } cl_int setStatus(cl_int status) { return detail::errHandler( ::clSetUserEventStatus(object_,status), __SET_USER_EVENT_STATUS_ERR); } }; #endif inline static cl_int WaitForEvents(const VECTOR_CLASS& events) { return detail::errHandler( ::clWaitForEvents( (cl_uint) events.size(), (cl_event*)&events.front()), __WAIT_FOR_EVENTS_ERR); } /*! \class Memory * \brief Memory interface for cl_mem. */ class Memory : public detail::Wrapper { public: Memory() : detail::Wrapper() { } Memory(const Memory& memory) : detail::Wrapper(memory) { } Memory& operator = (const Memory& rhs) { if (this != &rhs) { detail::Wrapper::operator=(rhs); } return *this; } template cl_int getInfo(cl_mem_info name, T* param) const { return detail::errHandler( detail::getInfo(&::clGetMemObjectInfo, object_, name, param), __GET_MEM_OBJECT_INFO_ERR); } template typename detail::param_traits::param_type getInfo(cl_int* err = NULL) const { typename detail::param_traits< detail::cl_mem_info, name>::param_type param; cl_int result = getInfo(name, ¶m); if (err != NULL) { *err = result; } return param; } #if defined(CL_VERSION_1_1) cl_int setDestructorCallback( void (CL_CALLBACK * pfn_notify)(cl_mem, void *), void * user_data = NULL) { return detail::errHandler( ::clSetMemObjectDestructorCallback( object_, pfn_notify, user_data), __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR); } #endif }; __GET_INFO_HELPER_WITH_RETAIN(cl::Memory) /*! \class Buffer * \brief Memory buffer interface. */ class Buffer : public Memory { public: Buffer( const Context& context, cl_mem_flags flags, ::size_t size, void* host_ptr = NULL, cl_int* err = NULL) { cl_int error; object_ = ::clCreateBuffer(context(), flags, size, host_ptr, &error); detail::errHandler(error, __CREATE_BUFFER_ERR); if (err != NULL) { *err = error; } } Buffer() : Memory() { } Buffer(const Buffer& buffer) : Memory(buffer) { } Buffer& operator = (const Buffer& rhs) { if (this != &rhs) { Memory::operator=(rhs); } return *this; } #if defined(CL_VERSION_1_1) Buffer createSubBuffer( cl_mem_flags flags, cl_buffer_create_type buffer_create_type, const void * buffer_create_info, cl_int * err = NULL) { Buffer result; cl_int error; result.object_ = ::clCreateSubBuffer( object_, flags, buffer_create_type, buffer_create_info, &error); detail::errHandler(error, __CREATE_SUBBUFFER_ERR); if (err != NULL) { *err = error; } return result; } #endif }; #if defined (USE_DX_INTEROP) class BufferD3D10 : public Buffer { public: typedef CL_API_ENTRY cl_mem (CL_API_CALL *PFN_clCreateFromD3D10BufferKHR)( cl_context context, cl_mem_flags flags, ID3D10Buffer* buffer, cl_int* errcode_ret); BufferD3D10( const Context& context, cl_mem_flags flags, ID3D10Buffer* bufobj, cl_int * err = NULL) { static PFN_clCreateFromD3D10BufferKHR pfn_clCreateFromD3D10BufferKHR = NULL; __INIT_CL_EXT_FCN_PTR(clCreateFromD3D10BufferKHR); cl_int error; object_ = pfn_clCreateFromD3D10BufferKHR( context(), flags, bufobj, &error); detail::errHandler(error, __CREATE_GL_BUFFER_ERR); if (err != NULL) { *err = error; } } BufferD3D10() : Buffer() { } BufferD3D10(const BufferD3D10& buffer) : Buffer(buffer) { } BufferD3D10& operator = (const BufferD3D10& rhs) { if (this != &rhs) { Buffer::operator=(rhs); } return *this; } }; #endif /*! \class BufferGL * \brief Memory buffer interface for GL interop. */ class BufferGL : public Buffer { public: BufferGL( const Context& context, cl_mem_flags flags, GLuint bufobj, cl_int * err = NULL) { cl_int error; object_ = ::clCreateFromGLBuffer( context(), flags, bufobj, &error); detail::errHandler(error, __CREATE_GL_BUFFER_ERR); if (err != NULL) { *err = error; } } BufferGL() : Buffer() { } BufferGL(const BufferGL& buffer) : Buffer(buffer) { } BufferGL& operator = (const BufferGL& rhs) { if (this != &rhs) { Buffer::operator=(rhs); } return *this; } cl_int getObjectInfo( cl_gl_object_type *type, GLuint * gl_object_name) { return detail::errHandler( ::clGetGLObjectInfo(object_,type,gl_object_name), __GET_GL_OBJECT_INFO_ERR); } }; /*! \class BufferRenderGL * \brief Memory buffer interface for GL interop with renderbuffer. */ class BufferRenderGL : public Buffer { public: BufferRenderGL( const Context& context, cl_mem_flags flags, GLuint bufobj, cl_int * err = NULL) { cl_int error; object_ = ::clCreateFromGLRenderbuffer( context(), flags, bufobj, &error); detail::errHandler(error, __CREATE_GL_BUFFER_ERR); if (err != NULL) { *err = error; } } BufferRenderGL() : Buffer() { } BufferRenderGL(const BufferGL& buffer) : Buffer(buffer) { } BufferRenderGL& operator = (const BufferRenderGL& rhs) { if (this != &rhs) { Buffer::operator=(rhs); } return *this; } cl_int getObjectInfo( cl_gl_object_type *type, GLuint * gl_object_name) { return detail::errHandler( ::clGetGLObjectInfo(object_,type,gl_object_name), __GET_GL_OBJECT_INFO_ERR); } }; /*! \class Image * \brief Base class interface for all images. */ class Image : public Memory { protected: Image() : Memory() { } Image(const Image& image) : Memory(image) { } Image& operator = (const Image& rhs) { if (this != &rhs) { Memory::operator=(rhs); } return *this; } public: template cl_int getImageInfo(cl_image_info name, T* param) const { return detail::errHandler( detail::getInfo(&::clGetImageInfo, object_, name, param), __GET_IMAGE_INFO_ERR); } template typename detail::param_traits::param_type getImageInfo(cl_int* err = NULL) const { typename detail::param_traits< detail::cl_image_info, name>::param_type param; cl_int result = getImageInfo(name, ¶m); if (err != NULL) { *err = result; } return param; } }; /*! \class Image2D * \brief Image interface for 2D images. */ class Image2D : public Image { public: Image2D( const Context& context, cl_mem_flags flags, ImageFormat format, ::size_t width, ::size_t height, ::size_t row_pitch = 0, void* host_ptr = NULL, cl_int* err = NULL) { cl_int error; object_ = ::clCreateImage2D( context(), flags,&format, width, height, row_pitch, host_ptr, &error); detail::errHandler(error, __CREATE_IMAGE2D_ERR); if (err != NULL) { *err = error; } } Image2D() { } Image2D(const Image2D& image2D) : Image(image2D) { } Image2D& operator = (const Image2D& rhs) { if (this != &rhs) { Image::operator=(rhs); } return *this; } }; /*! \class Image2DGL * \brief 2D image interface for GL interop. */ class Image2DGL : public Image2D { public: Image2DGL( const Context& context, cl_mem_flags flags, GLenum target, GLint miplevel, GLuint texobj, cl_int * err = NULL) { cl_int error; object_ = ::clCreateFromGLTexture2D( context(), flags, target, miplevel, texobj, &error); detail::errHandler(error, __CREATE_GL_BUFFER_ERR); if (err != NULL) { *err = error; } } Image2DGL() : Image2D() { } Image2DGL(const Image2DGL& image) : Image2D(image) { } Image2DGL& operator = (const Image2DGL& rhs) { if (this != &rhs) { Image2D::operator=(rhs); } return *this; } }; /*! \class Image3D * \brief Image interface for 3D images. */ class Image3D : public Image { public: Image3D( const Context& context, cl_mem_flags flags, ImageFormat format, ::size_t width, ::size_t height, ::size_t depth, ::size_t row_pitch = 0, ::size_t slice_pitch = 0, void* host_ptr = NULL, cl_int* err = NULL) { cl_int error; object_ = ::clCreateImage3D( context(), flags, &format, width, height, depth, row_pitch, slice_pitch, host_ptr, &error); detail::errHandler(error, __CREATE_IMAGE3D_ERR); if (err != NULL) { *err = error; } } Image3D() { } Image3D(const Image3D& image3D) : Image(image3D) { } Image3D& operator = (const Image3D& rhs) { if (this != &rhs) { Image::operator=(rhs); } return *this; } }; /*! \class Image2DGL * \brief 2D image interface for GL interop. */ class Image3DGL : public Image3D { public: Image3DGL( const Context& context, cl_mem_flags flags, GLenum target, GLint miplevel, GLuint texobj, cl_int * err = NULL) { cl_int error; object_ = ::clCreateFromGLTexture3D( context(), flags, target, miplevel, texobj, &error); detail::errHandler(error, __CREATE_GL_BUFFER_ERR); if (err != NULL) { *err = error; } } Image3DGL() : Image3D() { } Image3DGL(const Image3DGL& image) : Image3D(image) { } Image3DGL& operator = (const Image3DGL& rhs) { if (this != &rhs) { Image3D::operator=(rhs); } return *this; } }; /*! \class Sampler * \brief Sampler interface for cl_sampler. */ class Sampler : public detail::Wrapper { public: Sampler() { } Sampler( const Context& context, cl_bool normalized_coords, cl_addressing_mode addressing_mode, cl_filter_mode filter_mode, cl_int* err = NULL) { cl_int error; object_ = ::clCreateSampler( context(), normalized_coords, addressing_mode, filter_mode, &error); detail::errHandler(error, __CREATE_SAMPLER_ERR); if (err != NULL) { *err = error; } } Sampler(const Sampler& sampler) : detail::Wrapper(sampler) { } Sampler& operator = (const Sampler& rhs) { if (this != &rhs) { detail::Wrapper::operator=(rhs); } return *this; } template cl_int getInfo(cl_sampler_info name, T* param) const { return detail::errHandler( detail::getInfo(&::clGetSamplerInfo, object_, name, param), __GET_SAMPLER_INFO_ERR); } template typename detail::param_traits::param_type getInfo(cl_int* err = NULL) const { typename detail::param_traits< detail::cl_sampler_info, name>::param_type param; cl_int result = getInfo(name, ¶m); if (err != NULL) { *err = result; } return param; } }; __GET_INFO_HELPER_WITH_RETAIN(cl::Sampler) class Program; class CommandQueue; class Kernel; /*! \class NDRange * \brief NDRange interface */ class NDRange { private: size_t<3> sizes_; cl_uint dimensions_; public: NDRange() : dimensions_(0) { } NDRange(::size_t size0) : dimensions_(1) { sizes_.push_back(size0); } NDRange(::size_t size0, ::size_t size1) : dimensions_(2) { sizes_.push_back(size0); sizes_.push_back(size1); } NDRange(::size_t size0, ::size_t size1, ::size_t size2) : dimensions_(3) { sizes_.push_back(size0); sizes_.push_back(size1); sizes_.push_back(size2); } operator const ::size_t*() const { return (const ::size_t*) sizes_; } ::size_t dimensions() const { return dimensions_; } }; static const NDRange NullRange; /*! * \struct LocalSpaceArg * \brief Local address raper for use with Kernel::setArg */ struct LocalSpaceArg { ::size_t size_; }; namespace detail { template struct KernelArgumentHandler { static ::size_t size(const T&) { return sizeof(T); } static T* ptr(T& value) { return &value; } }; template <> struct KernelArgumentHandler { static ::size_t size(const LocalSpaceArg& value) { return value.size_; } static void* ptr(LocalSpaceArg&) { return NULL; } }; } //! \endcond inline LocalSpaceArg __local(::size_t size) { LocalSpaceArg ret = { size }; return ret; } class KernelFunctor; /*! \class Kernel * \brief Kernel interface that implements cl_kernel */ class Kernel : public detail::Wrapper { public: inline Kernel(const Program& program, const char* name, cl_int* err = NULL); Kernel() { } Kernel(const Kernel& kernel) : detail::Wrapper(kernel) { } Kernel& operator = (const Kernel& rhs) { if (this != &rhs) { detail::Wrapper::operator=(rhs); } return *this; } template cl_int getInfo(cl_kernel_info name, T* param) const { return detail::errHandler( detail::getInfo(&::clGetKernelInfo, object_, name, param), __GET_KERNEL_INFO_ERR); } template typename detail::param_traits::param_type getInfo(cl_int* err = NULL) const { typename detail::param_traits< detail::cl_kernel_info, name>::param_type param; cl_int result = getInfo(name, ¶m); if (err != NULL) { *err = result; } return param; } template cl_int getWorkGroupInfo( const Device& device, cl_kernel_work_group_info name, T* param) const { return detail::errHandler( detail::getInfo( &::clGetKernelWorkGroupInfo, object_, device(), name, param), __GET_KERNEL_WORK_GROUP_INFO_ERR); } template typename detail::param_traits::param_type getWorkGroupInfo(const Device& device, cl_int* err = NULL) const { typename detail::param_traits< detail::cl_kernel_work_group_info, name>::param_type param; cl_int result = getWorkGroupInfo(device, name, ¶m); if (err != NULL) { *err = result; } return param; } template cl_int setArg(cl_uint index, T value) { return detail::errHandler( ::clSetKernelArg( object_, index, detail::KernelArgumentHandler::size(value), detail::KernelArgumentHandler::ptr(value)), __SET_KERNEL_ARGS_ERR); } cl_int setArg(cl_uint index, ::size_t size, void* argPtr) { return detail::errHandler( ::clSetKernelArg(object_, index, size, argPtr), __SET_KERNEL_ARGS_ERR); } KernelFunctor bind( const CommandQueue& queue, const NDRange& offset, const NDRange& global, const NDRange& local); KernelFunctor bind( const CommandQueue& queue, const NDRange& global, const NDRange& local); }; __GET_INFO_HELPER_WITH_RETAIN(cl::Kernel) /*! \class Program * \brief Program interface that implements cl_program. */ class Program : public detail::Wrapper { public: typedef VECTOR_CLASS > Binaries; typedef VECTOR_CLASS > Sources; Program( const Context& context, const Sources& sources, cl_int* err = NULL) { cl_int error; const ::size_t n = (::size_t)sources.size(); ::size_t* lengths = (::size_t*) alloca(n * sizeof(::size_t)); const char** strings = (const char**) alloca(n * sizeof(const char*)); for (::size_t i = 0; i < n; ++i) { strings[i] = sources[(int)i].first; lengths[i] = sources[(int)i].second; } object_ = ::clCreateProgramWithSource( context(), (cl_uint)n, strings, lengths, &error); detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR); if (err != NULL) { *err = error; } } Program( const Context& context, const VECTOR_CLASS& devices, const Binaries& binaries, VECTOR_CLASS* binaryStatus = NULL, cl_int* err = NULL) { cl_int error; const ::size_t n = binaries.size(); ::size_t* lengths = (::size_t*) alloca(n * sizeof(::size_t)); const unsigned char** images = (const unsigned char**) alloca(n * sizeof(const void*)); for (::size_t i = 0; i < n; ++i) { images[i] = (const unsigned char*)binaries[(int)i].first; lengths[i] = binaries[(int)i].second; } object_ = ::clCreateProgramWithBinary( context(), (cl_uint) devices.size(), (cl_device_id*)&devices.front(), lengths, images, binaryStatus != NULL ? (cl_int*) &binaryStatus->front() : NULL, &error); detail::errHandler(error, __CREATE_PROGRAM_WITH_BINARY_ERR); if (err != NULL) { *err = error; } } Program() { } Program(const Program& program) : detail::Wrapper(program) { } Program& operator = (const Program& rhs) { if (this != &rhs) { detail::Wrapper::operator=(rhs); } return *this; } cl_int build( const VECTOR_CLASS& devices, const char* options = NULL, void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, void* data = NULL) const { return detail::errHandler( ::clBuildProgram( object_, (cl_uint) devices.size(), (cl_device_id*)&devices.front(), options, notifyFptr, data), __BUILD_PROGRAM_ERR); } template cl_int getInfo(cl_program_info name, T* param) const { return detail::errHandler( detail::getInfo(&::clGetProgramInfo, object_, name, param), __GET_PROGRAM_INFO_ERR); } template typename detail::param_traits::param_type getInfo(cl_int* err = NULL) const { typename detail::param_traits< detail::cl_program_info, name>::param_type param; cl_int result = getInfo(name, ¶m); if (err != NULL) { *err = result; } return param; } template cl_int getBuildInfo( const Device& device, cl_program_build_info name, T* param) const { return detail::errHandler( detail::getInfo( &::clGetProgramBuildInfo, object_, device(), name, param), __GET_PROGRAM_BUILD_INFO_ERR); } template typename detail::param_traits::param_type getBuildInfo(const Device& device, cl_int* err = NULL) const { typename detail::param_traits< detail::cl_program_build_info, name>::param_type param; cl_int result = getBuildInfo(device, name, ¶m); if (err != NULL) { *err = result; } return param; } cl_int createKernels(VECTOR_CLASS* kernels) { cl_uint numKernels; cl_int err = ::clCreateKernelsInProgram(object_, 0, NULL, &numKernels); if (err != CL_SUCCESS) { return detail::errHandler(err, __CREATE_KERNELS_IN_PROGRAM_ERR); } Kernel* value = (Kernel*) alloca(numKernels * sizeof(Kernel)); err = ::clCreateKernelsInProgram( object_, numKernels, (cl_kernel*) value, NULL); if (err != CL_SUCCESS) { return detail::errHandler(err, __CREATE_KERNELS_IN_PROGRAM_ERR); } kernels->assign(&value[0], &value[numKernels]); return CL_SUCCESS; } }; __GET_INFO_HELPER_WITH_RETAIN(cl::Program) inline Kernel::Kernel(const Program& program, const char* name, cl_int* err) { cl_int error; object_ = ::clCreateKernel(program(), name, &error); detail::errHandler(error, __CREATE_KERNEL_ERR); if (err != NULL) { *err = error; } } /*! \class CommandQueue * \brief CommandQueue interface for cl_command_queue. */ class CommandQueue : public detail::Wrapper { public: CommandQueue( const Context& context, const Device& device, cl_command_queue_properties properties = 0, cl_int* err = NULL) { cl_int error; object_ = ::clCreateCommandQueue( context(), device(), properties, &error); detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); if (err != NULL) { *err = error; } } CommandQueue() { } CommandQueue(const CommandQueue& commandQueue) : detail::Wrapper(commandQueue) { } CommandQueue& operator = (const CommandQueue& rhs) { if (this != &rhs) { detail::Wrapper::operator=(rhs); } return *this; } template cl_int getInfo(cl_command_queue_info name, T* param) const { return detail::errHandler( detail::getInfo( &::clGetCommandQueueInfo, object_, name, param), __GET_COMMAND_QUEUE_INFO_ERR); } template typename detail::param_traits::param_type getInfo(cl_int* err = NULL) const { typename detail::param_traits< detail::cl_command_queue_info, name>::param_type param; cl_int result = getInfo(name, ¶m); if (err != NULL) { *err = result; } return param; } cl_int enqueueReadBuffer( const Buffer& buffer, cl_bool blocking, ::size_t offset, ::size_t size, void* ptr, const VECTOR_CLASS* events = NULL, Event* event = NULL) const { return detail::errHandler( ::clEnqueueReadBuffer( object_, buffer(), blocking, offset, size, ptr, (events != NULL) ? (cl_uint) events->size() : 0, (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, (cl_event*) event), __ENQUEUE_READ_BUFFER_ERR); } cl_int enqueueWriteBuffer( const Buffer& buffer, cl_bool blocking, ::size_t offset, ::size_t size, const void* ptr, const VECTOR_CLASS* events = NULL, Event* event = NULL) const { return detail::errHandler( ::clEnqueueWriteBuffer( object_, buffer(), blocking, offset, size, ptr, (events != NULL) ? (cl_uint) events->size() : 0, (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, (cl_event*) event), __ENQUEUE_WRITE_BUFFER_ERR); } cl_int enqueueCopyBuffer( const Buffer& src, const Buffer& dst, ::size_t src_offset, ::size_t dst_offset, ::size_t size, const VECTOR_CLASS* events = NULL, Event* event = NULL) const { return detail::errHandler( ::clEnqueueCopyBuffer( object_, src(), dst(), src_offset, dst_offset, size, (events != NULL) ? (cl_uint) events->size() : 0, (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, (cl_event*) event), __ENQEUE_COPY_BUFFER_ERR); } #if defined(CL_VERSION_1_1) cl_int enqueueReadBufferRect( const Buffer& buffer, cl_bool blocking, const size_t<3>& buffer_offset, const size_t<3>& host_offset, const size_t<3>& region, ::size_t buffer_row_pitch, ::size_t buffer_slice_pitch, ::size_t host_row_pitch, ::size_t host_slice_pitch, void *ptr, const VECTOR_CLASS* events = NULL, Event* event = NULL) const { return detail::errHandler( ::clEnqueueReadBufferRect( object_, buffer(), blocking, (const ::size_t *)buffer_offset, (const ::size_t *)host_offset, (const ::size_t *)region, buffer_row_pitch, buffer_slice_pitch, host_row_pitch, host_slice_pitch, ptr, (events != NULL) ? (cl_uint) events->size() : 0, (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, (cl_event*) event), __ENQUEUE_READ_BUFFER_RECT_ERR); } cl_int enqueueWriteBufferRect( const Buffer& buffer, cl_bool blocking, const size_t<3>& buffer_offset, const size_t<3>& host_offset, const size_t<3>& region, ::size_t buffer_row_pitch, ::size_t buffer_slice_pitch, ::size_t host_row_pitch, ::size_t host_slice_pitch, void *ptr, const VECTOR_CLASS* events = NULL, Event* event = NULL) const { return detail::errHandler( ::clEnqueueWriteBufferRect( object_, buffer(), blocking, (const ::size_t *)buffer_offset, (const ::size_t *)host_offset, (const ::size_t *)region, buffer_row_pitch, buffer_slice_pitch, host_row_pitch, host_slice_pitch, ptr, (events != NULL) ? (cl_uint) events->size() : 0, (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, (cl_event*) event), __ENQUEUE_WRITE_BUFFER_RECT_ERR); } cl_int enqueueCopyBufferRect( const Buffer& src, const Buffer& dst, const size_t<3>& src_origin, const size_t<3>& dst_origin, const size_t<3>& region, ::size_t src_row_pitch, ::size_t src_slice_pitch, ::size_t dst_row_pitch, ::size_t dst_slice_pitch, const VECTOR_CLASS* events = NULL, Event* event = NULL) const { return detail::errHandler( ::clEnqueueCopyBufferRect( object_, src(), dst(), (const ::size_t *)src_origin, (const ::size_t *)dst_origin, (const ::size_t *)region, src_row_pitch, src_slice_pitch, dst_row_pitch, dst_slice_pitch, (events != NULL) ? (cl_uint) events->size() : 0, (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, (cl_event*) event), __ENQEUE_COPY_BUFFER_RECT_ERR); } #endif cl_int enqueueReadImage( const Image& image, cl_bool blocking, const size_t<3>& origin, const size_t<3>& region, ::size_t row_pitch, ::size_t slice_pitch, void* ptr, const VECTOR_CLASS* events = NULL, Event* event = NULL) const { return detail::errHandler( ::clEnqueueReadImage( object_, image(), blocking, (const ::size_t *) origin, (const ::size_t *) region, row_pitch, slice_pitch, ptr, (events != NULL) ? (cl_uint) events->size() : 0, (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, (cl_event*) event), __ENQUEUE_READ_IMAGE_ERR); } cl_int enqueueWriteImage( const Image& image, cl_bool blocking, const size_t<3>& origin, const size_t<3>& region, ::size_t row_pitch, ::size_t slice_pitch, void* ptr, const VECTOR_CLASS* events = NULL, Event* event = NULL) const { return detail::errHandler( ::clEnqueueWriteImage( object_, image(), blocking, (const ::size_t *) origin, (const ::size_t *) region, row_pitch, slice_pitch, ptr, (events != NULL) ? (cl_uint) events->size() : 0, (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, (cl_event*) event), __ENQUEUE_WRITE_IMAGE_ERR); } cl_int enqueueCopyImage( const Image& src, const Image& dst, const size_t<3>& src_origin, const size_t<3>& dst_origin, const size_t<3>& region, const VECTOR_CLASS* events = NULL, Event* event = NULL) const { return detail::errHandler( ::clEnqueueCopyImage( object_, src(), dst(), (const ::size_t *) src_origin, (const ::size_t *)dst_origin, (const ::size_t *) region, (events != NULL) ? (cl_uint) events->size() : 0, (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, (cl_event*) event), __ENQUEUE_COPY_IMAGE_ERR); } cl_int enqueueCopyImageToBuffer( const Image& src, const Buffer& dst, const size_t<3>& src_origin, const size_t<3>& region, ::size_t dst_offset, const VECTOR_CLASS* events = NULL, Event* event = NULL) const { return detail::errHandler( ::clEnqueueCopyImageToBuffer( object_, src(), dst(), (const ::size_t *) src_origin, (const ::size_t *) region, dst_offset, (events != NULL) ? (cl_uint) events->size() : 0, (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, (cl_event*) event), __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR); } cl_int enqueueCopyBufferToImage( const Buffer& src, const Image& dst, ::size_t src_offset, const size_t<3>& dst_origin, const size_t<3>& region, const VECTOR_CLASS* events = NULL, Event* event = NULL) const { return detail::errHandler( ::clEnqueueCopyBufferToImage( object_, src(), dst(), src_offset, (const ::size_t *) dst_origin, (const ::size_t *) region, (events != NULL) ? (cl_uint) events->size() : 0, (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, (cl_event*) event), __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR); } void* enqueueMapBuffer( const Buffer& buffer, cl_bool blocking, cl_map_flags flags, ::size_t offset, ::size_t size, const VECTOR_CLASS* events = NULL, Event* event = NULL, cl_int* err = NULL) const { cl_int error; void * result = ::clEnqueueMapBuffer( object_, buffer(), blocking, flags, offset, size, (events != NULL) ? (cl_uint) events->size() : 0, (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, (cl_event*) event, &error); detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); if (err != NULL) { *err = error; } return result; } void* enqueueMapImage( const Image& buffer, cl_bool blocking, cl_map_flags flags, const size_t<3>& origin, const size_t<3>& region, ::size_t * row_pitch, ::size_t * slice_pitch, const VECTOR_CLASS* events = NULL, Event* event = NULL, cl_int* err = NULL) const { cl_int error; void * result = ::clEnqueueMapImage( object_, buffer(), blocking, flags, (const ::size_t *) origin, (const ::size_t *) region, row_pitch, slice_pitch, (events != NULL) ? (cl_uint) events->size() : 0, (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, (cl_event*) event, &error); detail::errHandler(error, __ENQUEUE_MAP_IMAGE_ERR); if (err != NULL) { *err = error; } return result; } cl_int enqueueUnmapMemObject( const Memory& memory, void* mapped_ptr, const VECTOR_CLASS* events = NULL, Event* event = NULL) const { return detail::errHandler( ::clEnqueueUnmapMemObject( object_, memory(), mapped_ptr, (events != NULL) ? (cl_uint) events->size() : 0, (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, (cl_event*) event), __ENQUEUE_UNMAP_MEM_OBJECT_ERR); } cl_int enqueueNDRangeKernel( const Kernel& kernel, const NDRange& offset, const NDRange& global, const NDRange& local, const VECTOR_CLASS* events = NULL, Event* event = NULL) const { return detail::errHandler( ::clEnqueueNDRangeKernel( object_, kernel(), (cl_uint) global.dimensions(), offset.dimensions() != 0 ? (const ::size_t*) offset : NULL, (const ::size_t*) global, local.dimensions() != 0 ? (const ::size_t*) local : NULL, (events != NULL) ? (cl_uint) events->size() : 0, (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, (cl_event*) event), __ENQUEUE_NDRANGE_KERNEL_ERR); } cl_int enqueueTask( const Kernel& kernel, const VECTOR_CLASS* events = NULL, Event* event = NULL) const { return detail::errHandler( ::clEnqueueTask( object_, kernel(), (events != NULL) ? (cl_uint) events->size() : 0, (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, (cl_event*) event), __ENQUEUE_TASK_ERR); } cl_int enqueueNativeKernel( void (*userFptr)(void *), std::pair args, const VECTOR_CLASS* mem_objects = NULL, const VECTOR_CLASS* mem_locs = NULL, const VECTOR_CLASS* events = NULL, Event* event = NULL) const { cl_mem * mems = (mem_objects != NULL && mem_objects->size() > 0) ? (cl_mem*) alloca(mem_objects->size() * sizeof(cl_mem)) : NULL; if (mems != NULL) { for (unsigned int i = 0; i < mem_objects->size(); i++) { mems[i] = ((*mem_objects)[i])(); } } return detail::errHandler( ::clEnqueueNativeKernel( object_, userFptr, args.first, args.second, (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, mems, (mem_locs != NULL) ? (const void **) &mem_locs->front() : NULL, (events != NULL) ? (cl_uint) events->size() : 0, (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, (cl_event*) event), __ENQUEUE_NATIVE_KERNEL); } cl_int enqueueMarker(Event* event = NULL) const { return detail::errHandler( ::clEnqueueMarker(object_, (cl_event*) event), __ENQUEUE_MARKER_ERR); } cl_int enqueueWaitForEvents(const VECTOR_CLASS& events) const { return detail::errHandler( ::clEnqueueWaitForEvents( object_, (cl_uint) events.size(), (const cl_event*) &events.front()), __ENQUEUE_WAIT_FOR_EVENTS_ERR); } cl_int enqueueAcquireGLObjects( const VECTOR_CLASS* mem_objects = NULL, const VECTOR_CLASS* events = NULL, Event* event = NULL) const { return detail::errHandler( ::clEnqueueAcquireGLObjects( object_, (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL, (events != NULL) ? (cl_uint) events->size() : 0, (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, (cl_event*) event), __ENQUEUE_ACQUIRE_GL_ERR); } cl_int enqueueReleaseGLObjects( const VECTOR_CLASS* mem_objects = NULL, const VECTOR_CLASS* events = NULL, Event* event = NULL) const { return detail::errHandler( ::clEnqueueReleaseGLObjects( object_, (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL, (events != NULL) ? (cl_uint) events->size() : 0, (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, (cl_event*) event), __ENQUEUE_RELEASE_GL_ERR); } #if defined (USE_DX_INTEROP) typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueAcquireD3D10ObjectsKHR)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem* mem_objects, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event); typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueReleaseD3D10ObjectsKHR)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem* mem_objects, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event); cl_int enqueueAcquireD3D10Objects( const VECTOR_CLASS* mem_objects = NULL, const VECTOR_CLASS* events = NULL, Event* event = NULL) const { static PFN_clEnqueueAcquireD3D10ObjectsKHR pfn_clEnqueueAcquireD3D10ObjectsKHR = NULL; __INIT_CL_EXT_FCN_PTR(clEnqueueAcquireD3D10ObjectsKHR); return detail::errHandler( pfn_clEnqueueAcquireD3D10ObjectsKHR( object_, (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL, (events != NULL) ? (cl_uint) events->size() : 0, (events != NULL) ? (cl_event*) &events->front() : NULL, (cl_event*) event), __ENQUEUE_ACQUIRE_GL_ERR); } cl_int enqueueReleaseD3D10Objects( const VECTOR_CLASS* mem_objects = NULL, const VECTOR_CLASS* events = NULL, Event* event = NULL) const { static PFN_clEnqueueReleaseD3D10ObjectsKHR pfn_clEnqueueReleaseD3D10ObjectsKHR = NULL; __INIT_CL_EXT_FCN_PTR(clEnqueueReleaseD3D10ObjectsKHR); return detail::errHandler( pfn_clEnqueueReleaseD3D10ObjectsKHR( object_, (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL, (events != NULL) ? (cl_uint) events->size() : 0, (events != NULL) ? (cl_event*) &events->front() : NULL, (cl_event*) event), __ENQUEUE_RELEASE_GL_ERR); } #endif cl_int enqueueBarrier() const { return detail::errHandler( ::clEnqueueBarrier(object_), __ENQUEUE_BARRIER_ERR); } cl_int flush() const { return detail::errHandler(::clFlush(object_), __FLUSH_ERR); } cl_int finish() const { return detail::errHandler(::clFinish(object_), __FINISH_ERR); } }; __GET_INFO_HELPER_WITH_RETAIN(cl::CommandQueue) /*! \class KernelFunctor * \brief Kernel functor interface * * \note Currently only functors of zero to ten arguments are supported. It * is straightforward to add more and a more general solution, similar to * Boost.Lambda could be followed if required in the future. */ class KernelFunctor { private: Kernel kernel_; CommandQueue queue_; NDRange offset_; NDRange global_; NDRange local_; cl_int err_; public: KernelFunctor() { } KernelFunctor( const Kernel& kernel, const CommandQueue& queue, const NDRange& offset, const NDRange& global, const NDRange& local) : kernel_(kernel), queue_(queue), offset_(offset), global_(global), local_(local), err_(CL_SUCCESS) {} KernelFunctor& operator=(const KernelFunctor& rhs); KernelFunctor(const KernelFunctor& rhs); cl_int getError() { return err_; } inline Event operator()(const VECTOR_CLASS* events = NULL); template inline Event operator()( const A1& a1, const VECTOR_CLASS* events = NULL); template inline Event operator()( const A1& a1, const A2& a2, const VECTOR_CLASS* events = NULL); template inline Event operator()( const A1& a1, const A2& a2, const A3& a3, const VECTOR_CLASS* events = NULL); template inline Event operator()( const A1& a1, const A2& a2, const A3& a3, const A4& a4, const VECTOR_CLASS* events = NULL); template inline Event operator()( const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const VECTOR_CLASS* events = NULL); template inline Event operator()( const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const VECTOR_CLASS* events = NULL); template inline Event operator()( const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7, const VECTOR_CLASS* events = NULL); template inline Event operator()( const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7, const A8& a8, const VECTOR_CLASS* events = NULL); template inline Event operator()( const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7, const A8& a8, const A9& a9, const VECTOR_CLASS* events = NULL); template inline Event operator()( const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7, const A8& a8, const A9& a9, const A10& a10, const VECTOR_CLASS* events = NULL); template inline Event operator()( const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7, const A8& a8, const A9& a9, const A10& a10, const A11& a11, const VECTOR_CLASS* events = NULL); template inline Event operator()( const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7, const A8& a8, const A9& a9, const A10& a10, const A11& a11, const A12& a12, const VECTOR_CLASS* events = NULL); template inline Event operator()( const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7, const A8& a8, const A9& a9, const A10& a10, const A11& a11, const A12& a12, const A13& a13, const VECTOR_CLASS* events = NULL); template inline Event operator()( const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7, const A8& a8, const A9& a9, const A10& a10, const A11& a11, const A12& a12, const A13& a13, const A14& a14, const VECTOR_CLASS* events = NULL); template inline Event operator()( const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7, const A8& a8, const A9& a9, const A10& a10, const A11& a11, const A12& a12, const A13& a13, const A14& a14, const A15& a15, const VECTOR_CLASS* events = NULL); }; inline KernelFunctor Kernel::bind( const CommandQueue& queue, const NDRange& offset, const NDRange& global, const NDRange& local) { return KernelFunctor(*this,queue,offset,global,local); } inline KernelFunctor Kernel::bind( const CommandQueue& queue, const NDRange& global, const NDRange& local) { return KernelFunctor(*this,queue,NullRange,global,local); } inline KernelFunctor& KernelFunctor::operator=(const KernelFunctor& rhs) { if (this == &rhs) { return *this; } kernel_ = rhs.kernel_; queue_ = rhs.queue_; offset_ = rhs.offset_; global_ = rhs.global_; local_ = rhs.local_; return *this; } inline KernelFunctor::KernelFunctor(const KernelFunctor& rhs) : kernel_(rhs.kernel_), queue_(rhs.queue_), offset_(rhs.offset_), global_(rhs.global_), local_(rhs.local_) { } Event KernelFunctor::operator()(const VECTOR_CLASS* events) { Event event; err_ = queue_.enqueueNDRangeKernel( kernel_, offset_, global_, local_, NULL, // bgaster_fixme - do we want to allow wait event lists? &event); return event; } template Event KernelFunctor::operator()( const A1& a1, const VECTOR_CLASS* events) { Event event; kernel_.setArg(0,a1); err_ = queue_.enqueueNDRangeKernel( kernel_, offset_, global_, local_, NULL, // bgaster_fixme - do we want to allow wait event lists? &event); return event; } template Event KernelFunctor::operator()( const A1& a1, const A2& a2, const VECTOR_CLASS* events) { Event event; kernel_.setArg(0,a1); kernel_.setArg(1,a2); err_ = queue_.enqueueNDRangeKernel( kernel_, offset_, global_, local_, NULL, // bgaster_fixme - do we want to allow wait event lists? &event); return event; } template Event KernelFunctor::operator()( const A1& a1, const A2& a2, const A3& a3, const VECTOR_CLASS* events) { Event event; kernel_.setArg(0,a1); kernel_.setArg(1,a2); kernel_.setArg(2,a3); err_ = queue_.enqueueNDRangeKernel( kernel_, offset_, global_, local_, NULL, // bgaster_fixme - do we want to allow wait event lists? &event); return event; } template Event KernelFunctor::operator()( const A1& a1, const A2& a2, const A3& a3, const A4& a4, const VECTOR_CLASS* events) { Event event; kernel_.setArg(0,a1); kernel_.setArg(1,a2); kernel_.setArg(2,a3); kernel_.setArg(3,a4); err_ = queue_.enqueueNDRangeKernel( kernel_, offset_, global_, local_, NULL, // bgaster_fixme - do we want to allow wait event lists? &event); return event; } template Event KernelFunctor::operator()( const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const VECTOR_CLASS* events) { Event event; kernel_.setArg(0,a1); kernel_.setArg(1,a2); kernel_.setArg(2,a3); kernel_.setArg(3,a4); kernel_.setArg(4,a5); err_ = queue_.enqueueNDRangeKernel( kernel_, offset_, global_, local_, NULL, // bgaster_fixme - do we want to allow wait event lists? &event); return event; } template Event KernelFunctor::operator()( const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const VECTOR_CLASS* events) { Event event; kernel_.setArg(0,a1); kernel_.setArg(1,a2); kernel_.setArg(2,a3); kernel_.setArg(3,a4); kernel_.setArg(4,a5); kernel_.setArg(5,a6); err_ = queue_.enqueueNDRangeKernel( kernel_, offset_, global_, local_, NULL, // bgaster_fixme - do we want to allow wait event lists? &event); return event; } template Event KernelFunctor::operator()( const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7, const VECTOR_CLASS* events) { Event event; kernel_.setArg(0,a1); kernel_.setArg(1,a2); kernel_.setArg(2,a3); kernel_.setArg(3,a4); kernel_.setArg(4,a5); kernel_.setArg(5,a6); kernel_.setArg(6,a7); err_ = queue_.enqueueNDRangeKernel( kernel_, offset_, global_, local_, NULL, // bgaster_fixme - do we want to allow wait event lists? &event); return event; } template Event KernelFunctor::operator()( const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7, const A8& a8, const VECTOR_CLASS* events) { Event event; kernel_.setArg(0,a1); kernel_.setArg(1,a2); kernel_.setArg(2,a3); kernel_.setArg(3,a4); kernel_.setArg(4,a5); kernel_.setArg(5,a6); kernel_.setArg(6,a7); kernel_.setArg(7,a8); err_ = queue_.enqueueNDRangeKernel( kernel_, offset_, global_, local_, NULL, // bgaster_fixme - do we want to allow wait event lists? &event); return event; } template Event KernelFunctor::operator()( const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7, const A8& a8, const A9& a9, const VECTOR_CLASS* events) { Event event; kernel_.setArg(0,a1); kernel_.setArg(1,a2); kernel_.setArg(2,a3); kernel_.setArg(3,a4); kernel_.setArg(4,a5); kernel_.setArg(5,a6); kernel_.setArg(6,a7); kernel_.setArg(7,a8); kernel_.setArg(8,a9); err_ = queue_.enqueueNDRangeKernel( kernel_, offset_, global_, local_, NULL, // bgaster_fixme - do we want to allow wait event lists? &event); return event; } template Event KernelFunctor::operator()( const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7, const A8& a8, const A9& a9, const A10& a10, const VECTOR_CLASS* events) { Event event; kernel_.setArg(0,a1); kernel_.setArg(1,a2); kernel_.setArg(2,a3); kernel_.setArg(3,a4); kernel_.setArg(4,a5); kernel_.setArg(5,a6); kernel_.setArg(6,a7); kernel_.setArg(7,a8); kernel_.setArg(8,a9); kernel_.setArg(9,a10); err_ = queue_.enqueueNDRangeKernel( kernel_, offset_, global_, local_, NULL, // bgaster_fixme - do we want to allow wait event lists? &event); return event; } template Event KernelFunctor::operator()( const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7, const A8& a8, const A9& a9, const A10& a10, const A11& a11, const VECTOR_CLASS* events) { Event event; kernel_.setArg(0,a1); kernel_.setArg(1,a2); kernel_.setArg(2,a3); kernel_.setArg(3,a4); kernel_.setArg(4,a5); kernel_.setArg(5,a6); kernel_.setArg(6,a7); kernel_.setArg(7,a8); kernel_.setArg(8,a9); kernel_.setArg(9,a10); kernel_.setArg(10,a11); err_ = queue_.enqueueNDRangeKernel( kernel_, offset_, global_, local_, NULL, // bgaster_fixme - do we want to allow wait event lists? &event); return event; } template Event KernelFunctor::operator()( const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7, const A8& a8, const A9& a9, const A10& a10, const A11& a11, const A12& a12, const VECTOR_CLASS* events) { Event event; kernel_.setArg(0,a1); kernel_.setArg(1,a2); kernel_.setArg(2,a3); kernel_.setArg(3,a4); kernel_.setArg(4,a5); kernel_.setArg(5,a6); kernel_.setArg(6,a7); kernel_.setArg(7,a8); kernel_.setArg(8,a9); kernel_.setArg(9,a10); kernel_.setArg(10,a11); kernel_.setArg(11,a12); err_ = queue_.enqueueNDRangeKernel( kernel_, offset_, global_, local_, NULL, // bgaster_fixme - do we want to allow wait event lists? &event); return event; } template Event KernelFunctor::operator()( const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7, const A8& a8, const A9& a9, const A10& a10, const A11& a11, const A12& a12, const A13& a13, const VECTOR_CLASS* events) { Event event; kernel_.setArg(0,a1); kernel_.setArg(1,a2); kernel_.setArg(2,a3); kernel_.setArg(3,a4); kernel_.setArg(4,a5); kernel_.setArg(5,a6); kernel_.setArg(6,a7); kernel_.setArg(7,a8); kernel_.setArg(8,a9); kernel_.setArg(9,a10); kernel_.setArg(10,a11); kernel_.setArg(11,a12); kernel_.setArg(12,a13); err_ = queue_.enqueueNDRangeKernel( kernel_, offset_, global_, local_, NULL, // bgaster_fixme - do we want to allow wait event lists? &event); return event; } template Event KernelFunctor::operator()( const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7, const A8& a8, const A9& a9, const A10& a10, const A11& a11, const A12& a12, const A13& a13, const A14& a14, const VECTOR_CLASS* events) { Event event; kernel_.setArg(0,a1); kernel_.setArg(1,a2); kernel_.setArg(2,a3); kernel_.setArg(3,a4); kernel_.setArg(4,a5); kernel_.setArg(5,a6); kernel_.setArg(6,a7); kernel_.setArg(7,a8); kernel_.setArg(8,a9); kernel_.setArg(9,a10); kernel_.setArg(10,a11); kernel_.setArg(11,a12); kernel_.setArg(12,a13); kernel_.setArg(13,a14); err_ = queue_.enqueueNDRangeKernel( kernel_, offset_, global_, local_, NULL, // bgaster_fixme - do we want to allow wait event lists? &event); return event; } template Event KernelFunctor::operator()( const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7, const A8& a8, const A9& a9, const A10& a10, const A11& a11, const A12& a12, const A13& a13, const A14& a14, const A15& a15, const VECTOR_CLASS* events) { Event event; kernel_.setArg(0,a1); kernel_.setArg(1,a2); kernel_.setArg(2,a3); kernel_.setArg(3,a4); kernel_.setArg(4,a5); kernel_.setArg(5,a6); kernel_.setArg(6,a7); kernel_.setArg(7,a8); kernel_.setArg(8,a9); kernel_.setArg(9,a10); kernel_.setArg(10,a11); kernel_.setArg(11,a12); kernel_.setArg(12,a13); kernel_.setArg(13,a14); kernel_.setArg(14,a15); err_ = queue_.enqueueNDRangeKernel( kernel_, offset_, global_, local_, NULL, // bgaster_fixme - do we want to allow wait event lists? &event); return event; } #undef __ERR_STR #if !defined(__CL_USER_OVERRIDE_ERROR_STRINGS) #undef __GET_DEVICE_INFO_ERR #undef __GET_PLATFORM_INFO_ERR #undef __GET_DEVICE_IDS_ERR #undef __GET_CONTEXT_INFO_ERR #undef __GET_EVENT_INFO_ERR #undef __GET_EVENT_PROFILE_INFO_ERR #undef __GET_MEM_OBJECT_INFO_ERR #undef __GET_IMAGE_INFO_ERR #undef __GET_SAMPLER_INFO_ERR #undef __GET_KERNEL_INFO_ERR #undef __GET_KERNEL_WORK_GROUP_INFO_ERR #undef __GET_PROGRAM_INFO_ERR #undef __GET_PROGRAM_BUILD_INFO_ERR #undef __GET_COMMAND_QUEUE_INFO_ERR #undef __CREATE_CONTEXT_FROM_TYPE_ERR #undef __GET_SUPPORTED_IMAGE_FORMATS_ERR #undef __CREATE_BUFFER_ERR #undef __CREATE_SUBBUFFER_ERR #undef __CREATE_IMAGE2D_ERR #undef __CREATE_IMAGE3D_ERR #undef __CREATE_SAMPLER_ERR #undef __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR #undef __CREATE_USER_EVENT_ERR #undef __SET_USER_EVENT_STATUS_ERR #undef __SET_EVENT_CALLBACK_ERR #undef __WAIT_FOR_EVENTS_ERR #undef __CREATE_KERNEL_ERR #undef __SET_KERNEL_ARGS_ERR #undef __CREATE_PROGRAM_WITH_SOURCE_ERR #undef __CREATE_PROGRAM_WITH_BINARY_ERR #undef __BUILD_PROGRAM_ERR #undef __CREATE_KERNELS_IN_PROGRAM_ERR #undef __CREATE_COMMAND_QUEUE_ERR #undef __SET_COMMAND_QUEUE_PROPERTY_ERR #undef __ENQUEUE_READ_BUFFER_ERR #undef __ENQUEUE_WRITE_BUFFER_ERR #undef __ENQUEUE_READ_BUFFER_RECT_ERR #undef __ENQUEUE_WRITE_BUFFER_RECT_ERR #undef __ENQEUE_COPY_BUFFER_ERR #undef __ENQEUE_COPY_BUFFER_RECT_ERR #undef __ENQUEUE_READ_IMAGE_ERR #undef __ENQUEUE_WRITE_IMAGE_ERR #undef __ENQUEUE_COPY_IMAGE_ERR #undef __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR #undef __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR #undef __ENQUEUE_MAP_BUFFER_ERR #undef __ENQUEUE_MAP_IMAGE_ERR #undef __ENQUEUE_UNMAP_MEM_OBJECT_ERR #undef __ENQUEUE_NDRANGE_KERNEL_ERR #undef __ENQUEUE_TASK_ERR #undef __ENQUEUE_NATIVE_KERNEL #undef __UNLOAD_COMPILER_ERR #endif //__CL_USER_OVERRIDE_ERROR_STRINGS #undef __GET_INFO_HELPER_WITH_RETAIN // Extensions #undef __INIT_CL_EXT_FCN_PTR #undef __CREATE_SUB_DEVICES #if defined(USE_CL_DEVICE_FISSION) #undef __PARAM_NAME_DEVICE_FISSION #endif // USE_CL_DEVICE_FISSION } // namespace cl #endif // CL_HPP_ Release_v0.3/include/CL/cl_d3d10.h000066400000000000000000000113731223142177000165730ustar00rootroot00000000000000/********************************************************************************** * Copyright (c) 2008-2010 The Khronos Group Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and/or associated documentation files (the * "Materials"), to deal in the Materials without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Materials, and to * permit persons to whom the Materials are furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Materials. * * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. **********************************************************************************/ /* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */ #ifndef __OPENCL_CL_D3D10_H #define __OPENCL_CL_D3D10_H #include #include #include #ifdef __cplusplus extern "C" { #endif /****************************************************************************** * cl_khr_d3d10_sharing */ #define cl_khr_d3d10_sharing 1 typedef cl_uint cl_d3d10_device_source_khr; typedef cl_uint cl_d3d10_device_set_khr; /******************************************************************************/ // Error Codes #define CL_INVALID_D3D10_DEVICE_KHR -1002 #define CL_INVALID_D3D10_RESOURCE_KHR -1003 #define CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR -1004 #define CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR -1005 // cl_d3d10_device_source_nv #define CL_D3D10_DEVICE_KHR 0x4010 #define CL_D3D10_DXGI_ADAPTER_KHR 0x4011 // cl_d3d10_device_set_nv #define CL_PREFERRED_DEVICES_FOR_D3D10_KHR 0x4012 #define CL_ALL_DEVICES_FOR_D3D10_KHR 0x4013 // cl_context_info #define CL_CONTEXT_D3D10_DEVICE_KHR 0x4014 #define CL_CONTEXT_D3D10_PREFER_SHARED_RESOURCES_KHR 0x402C // cl_mem_info #define CL_MEM_D3D10_RESOURCE_KHR 0x4015 // cl_image_info #define CL_IMAGE_D3D10_SUBRESOURCE_KHR 0x4016 // cl_command_type #define CL_COMMAND_ACQUIRE_D3D10_OBJECTS_KHR 0x4017 #define CL_COMMAND_RELEASE_D3D10_OBJECTS_KHR 0x4018 /******************************************************************************/ typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromD3D10KHR_fn)( cl_platform_id platform, cl_d3d10_device_source_khr d3d_device_source, void * d3d_object, cl_d3d10_device_set_khr d3d_device_set, cl_uint num_entries, cl_device_id * devices, cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10BufferKHR_fn)( cl_context context, cl_mem_flags flags, ID3D10Buffer * resource, cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10Texture2DKHR_fn)( cl_context context, cl_mem_flags flags, ID3D10Texture2D * resource, UINT subresource, cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10Texture3DKHR_fn)( cl_context context, cl_mem_flags flags, ID3D10Texture3D * resource, UINT subresource, cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireD3D10ObjectsKHR_fn)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem * mem_objects, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseD3D10ObjectsKHR_fn)( cl_command_queue command_queue, cl_uint num_objects, cl_mem * mem_objects, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) CL_API_SUFFIX__VERSION_1_0; #ifdef __cplusplus } #endif #endif // __OPENCL_CL_D3D10_H Release_v0.3/include/CL/cl_ext.h000066400000000000000000000233371223142177000165630ustar00rootroot00000000000000/******************************************************************************* * Copyright (c) 2008-2010 The Khronos Group Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and/or associated documentation files (the * "Materials"), to deal in the Materials without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Materials, and to * permit persons to whom the Materials are furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Materials. * * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. ******************************************************************************/ /* $Revision: 11928 $ on $Date: 2010-07-13 09:04:56 -0700 (Tue, 13 Jul 2010) $ */ /* cl_ext.h contains OpenCL extensions which don't have external */ /* (OpenGL, D3D) dependencies. */ #ifndef __CL_EXT_H #define __CL_EXT_H #ifdef __cplusplus extern "C" { #endif #ifdef __APPLE__ #include #include #else #include #endif /* cl_khr_fp64 extension - no extension #define since it has no functions */ #define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032 /* cl_khr_fp16 extension - no extension #define since it has no functions */ #define CL_DEVICE_HALF_FP_CONFIG 0x1033 /* Memory object destruction * * Apple extension for use to manage externally allocated buffers used with cl_mem objects with CL_MEM_USE_HOST_PTR * * Registers a user callback function that will be called when the memory object is deleted and its resources * freed. Each call to clSetMemObjectCallbackFn registers the specified user callback function on a callback * stack associated with memobj. The registered user callback functions are called in the reverse order in * which they were registered. The user callback functions are called and then the memory object is deleted * and its resources freed. This provides a mechanism for the application (and libraries) using memobj to be * notified when the memory referenced by host_ptr, specified when the memory object is created and used as * the storage bits for the memory object, can be reused or freed. * * The application may not call CL api's with the cl_mem object passed to the pfn_notify. * * Please check for the "cl_APPLE_SetMemObjectDestructor" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS) * before using. */ #define cl_APPLE_SetMemObjectDestructor 1 cl_int CL_API_ENTRY clSetMemObjectDestructorAPPLE( cl_mem /* memobj */, void (* /*pfn_notify*/)( cl_mem /* memobj */, void* /*user_data*/), void * /*user_data */ ) CL_EXT_SUFFIX__VERSION_1_0; /* Context Logging Functions * * The next three convenience functions are intended to be used as the pfn_notify parameter to clCreateContext(). * Please check for the "cl_APPLE_ContextLoggingFunctions" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS) * before using. * * clLogMessagesToSystemLog fowards on all log messages to the Apple System Logger */ #define cl_APPLE_ContextLoggingFunctions 1 extern void CL_API_ENTRY clLogMessagesToSystemLogAPPLE( const char * /* errstr */, const void * /* private_info */, size_t /* cb */, void * /* user_data */ ) CL_EXT_SUFFIX__VERSION_1_0; /* clLogMessagesToStdout sends all log messages to the file descriptor stdout */ extern void CL_API_ENTRY clLogMessagesToStdoutAPPLE( const char * /* errstr */, const void * /* private_info */, size_t /* cb */, void * /* user_data */ ) CL_EXT_SUFFIX__VERSION_1_0; /* clLogMessagesToStderr sends all log messages to the file descriptor stderr */ extern void CL_API_ENTRY clLogMessagesToStderrAPPLE( const char * /* errstr */, const void * /* private_info */, size_t /* cb */, void * /* user_data */ ) CL_EXT_SUFFIX__VERSION_1_0; /************************ * cl_khr_icd extension * ************************/ #define cl_khr_icd 1 /* cl_platform_info */ #define CL_PLATFORM_ICD_SUFFIX_KHR 0x0920 /* Additional Error Codes */ #define CL_PLATFORM_NOT_FOUND_KHR -1001 extern CL_API_ENTRY cl_int CL_API_CALL clIcdGetPlatformIDsKHR(cl_uint /* num_entries */, cl_platform_id * /* platforms */, cl_uint * /* num_platforms */); typedef CL_API_ENTRY cl_int (CL_API_CALL *clIcdGetPlatformIDsKHR_fn)( cl_uint /* num_entries */, cl_platform_id * /* platforms */, cl_uint * /* num_platforms */); /****************************************** * cl_nv_device_attribute_query extension * ******************************************/ /* cl_nv_device_attribute_query extension - no extension #define since it has no functions */ #define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV 0x4000 #define CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV 0x4001 #define CL_DEVICE_REGISTERS_PER_BLOCK_NV 0x4002 #define CL_DEVICE_WARP_SIZE_NV 0x4003 #define CL_DEVICE_GPU_OVERLAP_NV 0x4004 #define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV 0x4005 #define CL_DEVICE_INTEGRATED_MEMORY_NV 0x4006 /********************************* * cl_amd_device_attribute_query * *********************************/ #define CL_DEVICE_PROFILING_TIMER_OFFSET_AMD 0x4036 #ifdef CL_VERSION_1_1 /*********************************** * cl_ext_device_fission extension * ***********************************/ #define cl_ext_device_fission 1 extern CL_API_ENTRY cl_int CL_API_CALL clReleaseDeviceEXT( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1; typedef CL_API_ENTRY cl_int (CL_API_CALL *clReleaseDeviceEXT_fn)( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1; extern CL_API_ENTRY cl_int CL_API_CALL clRetainDeviceEXT( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1; typedef CL_API_ENTRY cl_int (CL_API_CALL *clRetainDeviceEXT_fn)( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1; typedef cl_ulong cl_device_partition_property_ext; extern CL_API_ENTRY cl_int CL_API_CALL clCreateSubDevicesEXT( cl_device_id /*in_device*/, const cl_device_partition_property_ext * /* properties */, cl_uint /*num_entries*/, cl_device_id * /*out_devices*/, cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1; typedef CL_API_ENTRY cl_int ( CL_API_CALL * clCreateSubDevicesEXT_fn)( cl_device_id /*in_device*/, const cl_device_partition_property_ext * /* properties */, cl_uint /*num_entries*/, cl_device_id * /*out_devices*/, cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1; /* cl_device_partition_property_ext */ #define CL_DEVICE_PARTITION_EQUALLY_EXT 0x4050 #define CL_DEVICE_PARTITION_BY_COUNTS_EXT 0x4051 #define CL_DEVICE_PARTITION_BY_NAMES_EXT 0x4052 #define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT 0x4053 /* clDeviceGetInfo selectors */ #define CL_DEVICE_PARENT_DEVICE_EXT 0x4054 #define CL_DEVICE_PARTITION_TYPES_EXT 0x4055 #define CL_DEVICE_AFFINITY_DOMAINS_EXT 0x4056 #define CL_DEVICE_REFERENCE_COUNT_EXT 0x4057 #define CL_DEVICE_PARTITION_STYLE_EXT 0x4058 /* error codes */ #define CL_DEVICE_PARTITION_FAILED_EXT -1057 #define CL_INVALID_PARTITION_COUNT_EXT -1058 #define CL_INVALID_PARTITION_NAME_EXT -1059 /* CL_AFFINITY_DOMAINs */ #define CL_AFFINITY_DOMAIN_L1_CACHE_EXT 0x1 #define CL_AFFINITY_DOMAIN_L2_CACHE_EXT 0x2 #define CL_AFFINITY_DOMAIN_L3_CACHE_EXT 0x3 #define CL_AFFINITY_DOMAIN_L4_CACHE_EXT 0x4 #define CL_AFFINITY_DOMAIN_NUMA_EXT 0x10 #define CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT 0x100 /* cl_device_partition_property_ext list terminators */ #define CL_PROPERTIES_LIST_END_EXT ((cl_device_partition_property_ext) 0) #define CL_PARTITION_BY_COUNTS_LIST_END_EXT ((cl_device_partition_property_ext) 0) #define CL_PARTITION_BY_NAMES_LIST_END_EXT ((cl_device_partition_property_ext) 0 - 1) #endif /* CL_VERSION_1_1 */ #ifdef __cplusplus } #endif #endif /* __CL_EXT_H */ Release_v0.3/include/CL/cl_gl.h000066400000000000000000000150731223142177000163630ustar00rootroot00000000000000/********************************************************************************** * Copyright (c) 2008-2010 The Khronos Group Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and/or associated documentation files (the * "Materials"), to deal in the Materials without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Materials, and to * permit persons to whom the Materials are furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Materials. * * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. **********************************************************************************/ /* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */ /* * cl_gl.h contains Khronos-approved (KHR) OpenCL extensions which have * OpenGL dependencies. The application is responsible for #including * OpenGL or OpenGL ES headers before #including cl_gl.h. */ #ifndef __OPENCL_CL_GL_H #define __OPENCL_CL_GL_H #ifdef __APPLE__ #include #include #else #include #endif #ifdef __cplusplus extern "C" { #endif typedef cl_uint cl_gl_object_type; typedef cl_uint cl_gl_texture_info; typedef cl_uint cl_gl_platform_info; typedef struct __GLsync *cl_GLsync; /* cl_gl_object_type */ #define CL_GL_OBJECT_BUFFER 0x2000 #define CL_GL_OBJECT_TEXTURE2D 0x2001 #define CL_GL_OBJECT_TEXTURE3D 0x2002 #define CL_GL_OBJECT_RENDERBUFFER 0x2003 /* cl_gl_texture_info */ #define CL_GL_TEXTURE_TARGET 0x2004 #define CL_GL_MIPMAP_LEVEL 0x2005 extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromGLBuffer(cl_context /* context */, cl_mem_flags /* flags */, cl_GLuint /* bufobj */, int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromGLTexture2D(cl_context /* context */, cl_mem_flags /* flags */, cl_GLenum /* target */, cl_GLint /* miplevel */, cl_GLuint /* texture */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromGLTexture3D(cl_context /* context */, cl_mem_flags /* flags */, cl_GLenum /* target */, cl_GLint /* miplevel */, cl_GLuint /* texture */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromGLRenderbuffer(cl_context /* context */, cl_mem_flags /* flags */, cl_GLuint /* renderbuffer */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clGetGLObjectInfo(cl_mem /* memobj */, cl_gl_object_type * /* gl_object_type */, cl_GLuint * /* gl_object_name */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clGetGLTextureInfo(cl_mem /* memobj */, cl_gl_texture_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueAcquireGLObjects(cl_command_queue /* command_queue */, cl_uint /* num_objects */, const cl_mem * /* mem_objects */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueReleaseGLObjects(cl_command_queue /* command_queue */, cl_uint /* num_objects */, const cl_mem * /* mem_objects */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; /* cl_khr_gl_sharing extension */ #define cl_khr_gl_sharing 1 typedef cl_uint cl_gl_context_info; /* Additional Error Codes */ #define CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR -1000 /* cl_gl_context_info */ #define CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR 0x2006 #define CL_DEVICES_FOR_GL_CONTEXT_KHR 0x2007 /* Additional cl_context_properties */ #define CL_GL_CONTEXT_KHR 0x2008 #define CL_EGL_DISPLAY_KHR 0x2009 #define CL_GLX_DISPLAY_KHR 0x200A #define CL_WGL_HDC_KHR 0x200B #define CL_CGL_SHAREGROUP_KHR 0x200C extern CL_API_ENTRY cl_int CL_API_CALL clGetGLContextInfoKHR(const cl_context_properties * /* properties */, cl_gl_context_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)( const cl_context_properties * properties, cl_gl_context_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret); #ifdef __cplusplus } #endif #endif /* __OPENCL_CL_GL_H */ Release_v0.3/include/CL/cl_gl_ext.h000066400000000000000000000051101223142177000172320ustar00rootroot00000000000000/********************************************************************************** * Copyright (c) 2008-2010 The Khronos Group Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and/or associated documentation files (the * "Materials"), to deal in the Materials without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Materials, and to * permit persons to whom the Materials are furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Materials. * * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. **********************************************************************************/ /* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */ /* cl_gl_ext.h contains vendor (non-KHR) OpenCL extensions which have */ /* OpenGL dependencies. */ #ifndef __OPENCL_CL_GL_EXT_H #define __OPENCL_CL_GL_EXT_H #ifdef __cplusplus extern "C" { #endif #ifdef __APPLE__ #include #else #include #endif /* * For each extension, follow this template * /* cl_VEN_extname extension */ /* #define cl_VEN_extname 1 * ... define new types, if any * ... define new tokens, if any * ... define new APIs, if any * * If you need GLtypes here, mirror them with a cl_GLtype, rather than including a GL header * This allows us to avoid having to decide whether to include GL headers or GLES here. */ /* * cl_khr_gl_event extension * See section 9.9 in the OpenCL 1.1 spec for more information */ #define CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR 0x200D extern CL_API_ENTRY cl_event CL_API_CALL clCreateEventFromGLsyncKHR(cl_context /* context */, cl_GLsync /* cl_GLsync */, cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1; #ifdef __cplusplus } #endif #endif /* __OPENCL_CL_GL_EXT_H */ Release_v0.3/include/CL/cl_intel.h000066400000000000000000000115561223142177000170760ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #ifndef __OPENCL_CL_INTEL_H #define __OPENCL_CL_INTEL_H #include "CL/cl.h" #ifdef __cplusplus extern "C" { #endif #define CL_MEM_PINNABLE (1 << 10) /* Track allocations and report current number of unfreed allocations */ extern CL_API_ENTRY cl_int CL_API_CALL clReportUnfreedIntel(void); typedef CL_API_ENTRY cl_int (CL_API_CALL *clReportUnfreedIntel_fn)(void); /* 1 to 1 mapping of drm_intel_bo_map */ extern CL_API_ENTRY void* CL_API_CALL clMapBufferIntel(cl_mem, cl_int*); typedef CL_API_ENTRY void* (CL_API_CALL *clMapBufferIntel_fn)(cl_mem, cl_int*); /* 1 to 1 mapping of drm_intel_bo_unmap */ extern CL_API_ENTRY cl_int CL_API_CALL clUnmapBufferIntel(cl_mem); typedef CL_API_ENTRY cl_int (CL_API_CALL *clUnmapBufferIntel_fn)(cl_mem); /* 1 to 1 mapping of drm_intel_gem_bo_map_gtt */ extern CL_API_ENTRY void* CL_API_CALL clMapBufferGTTIntel(cl_mem, cl_int*); typedef CL_API_ENTRY void* (CL_API_CALL *clMapBufferGTTIntel_fn)(cl_mem, cl_int*); /* 1 to 1 mapping of drm_intel_gem_bo_unmap_gtt */ extern CL_API_ENTRY cl_int CL_API_CALL clUnmapBufferGTTIntel(cl_mem); typedef CL_API_ENTRY cl_int (CL_API_CALL *clUnmapBufferGTTIntel_fn)(cl_mem); /* Pin /Unpin the buffer in GPU memory (must be root) */ extern CL_API_ENTRY cl_int CL_API_CALL clPinBufferIntel(cl_mem); extern CL_API_ENTRY cl_int CL_API_CALL clUnpinBufferIntel(cl_mem); typedef CL_API_ENTRY cl_int (CL_API_CALL *clPinBufferIntel_fn)(cl_mem); typedef CL_API_ENTRY cl_int (CL_API_CALL *clUnpinBufferIntel_fn)(cl_mem); /* Get the generation of the Gen device (used to load the proper binary) */ extern CL_API_ENTRY cl_int CL_API_CALL clGetGenVersionIntel(cl_device_id device, cl_int *ver); typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGenVersionIntel_fn)( cl_device_id device, cl_int *ver); /* Create a program from a LLVM source file */ extern CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithLLVMIntel(cl_context /* context */, cl_uint /* num_devices */, const cl_device_id * /* device_list */, const char * /* file */, cl_int * /* errcode_ret */); typedef CL_API_ENTRY cl_program (CL_API_CALL *clCreateProgramWithLLVMIntel_fn)( cl_context /* context */, cl_uint /* num_devices */, const cl_device_id * /* device_list */, const char * /* file */, cl_int * /* errcode_ret */); /* Create buffer from libva's buffer object */ extern CL_API_ENTRY cl_mem CL_API_CALL clCreateBufferFromLibvaIntel(cl_context /* context */, unsigned int /* bo_name */, cl_int * /* errcode_ret */); typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateBufferFromLibvaIntel_fn)( cl_context /* context */, unsigned int /* bo_name */, cl_int * /* errcode_ret */); /* Create image from libva's buffer object */ typedef struct _cl_libva_image { unsigned int bo_name; uint32_t offset; uint32_t width; uint32_t height; cl_image_format fmt; uint32_t row_pitch; uint32_t reserved[8]; } cl_libva_image; extern CL_API_ENTRY cl_mem CL_API_CALL clCreateImageFromLibvaIntel(cl_context /* context */, const cl_libva_image * /* info */, cl_int * /* errcode_ret */); typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateImageFromLibvaIntel_fn)( cl_context /* context */, const cl_libva_image * /* info */, cl_int * /* errcode_ret */); #ifdef __cplusplus } #endif #endif /* __OPENCL_CL_INTEL_H */ Release_v0.3/include/CL/cl_platform.h000066400000000000000000001122241223142177000176010ustar00rootroot00000000000000/********************************************************************************** * Copyright (c) 2008-2010 The Khronos Group Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and/or associated documentation files (the * "Materials"), to deal in the Materials without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Materials, and to * permit persons to whom the Materials are furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Materials. * * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. **********************************************************************************/ /* $Revision: 11803 $ on $Date: 2010-06-25 10:02:12 -0700 (Fri, 25 Jun 2010) $ */ #ifndef __CL_PLATFORM_H #define __CL_PLATFORM_H #ifdef __APPLE__ /* Contains #defines for AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER below */ #include #endif #ifdef __cplusplus extern "C" { #endif #if defined(_WIN32) #define CL_API_ENTRY #define CL_API_CALL __stdcall #define CL_CALLBACK __stdcall #else #define CL_API_ENTRY #define CL_API_CALL #define CL_CALLBACK #endif #ifdef __APPLE__ #define CL_EXTENSION_WEAK_LINK __attribute__((weak_import)) #define CL_API_SUFFIX__VERSION_1_0 AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER #define CL_EXT_SUFFIX__VERSION_1_0 CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER #define CL_API_SUFFIX__VERSION_1_1 CL_EXTENSION_WEAK_LINK #define CL_EXT_SUFFIX__VERSION_1_1 CL_EXTENSION_WEAK_LINK #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER #else #define CL_EXTENSION_WEAK_LINK #define CL_API_SUFFIX__VERSION_1_0 #define CL_EXT_SUFFIX__VERSION_1_0 #define CL_API_SUFFIX__VERSION_1_1 #define CL_EXT_SUFFIX__VERSION_1_1 #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED #endif #if (defined (_WIN32) && defined(_MSC_VER)) /* scalar types */ typedef signed __int8 cl_char; typedef unsigned __int8 cl_uchar; typedef signed __int16 cl_short; typedef unsigned __int16 cl_ushort; typedef signed __int32 cl_int; typedef unsigned __int32 cl_uint; typedef signed __int64 cl_long; typedef unsigned __int64 cl_ulong; typedef unsigned __int16 cl_half; typedef float cl_float; typedef double cl_double; /* Macro names and corresponding values defined by OpenCL */ #define CL_CHAR_BIT 8 #define CL_SCHAR_MAX 127 #define CL_SCHAR_MIN (-127-1) #define CL_CHAR_MAX CL_SCHAR_MAX #define CL_CHAR_MIN CL_SCHAR_MIN #define CL_UCHAR_MAX 255 #define CL_SHRT_MAX 32767 #define CL_SHRT_MIN (-32767-1) #define CL_USHRT_MAX 65535 #define CL_INT_MAX 2147483647 #define CL_INT_MIN (-2147483647-1) #define CL_UINT_MAX 0xffffffffU #define CL_LONG_MAX ((cl_long) 0x7FFFFFFFFFFFFFFFLL) #define CL_LONG_MIN ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL) #define CL_ULONG_MAX ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL) #define CL_FLT_DIG 6 #define CL_FLT_MANT_DIG 24 #define CL_FLT_MAX_10_EXP +38 #define CL_FLT_MAX_EXP +128 #define CL_FLT_MIN_10_EXP -37 #define CL_FLT_MIN_EXP -125 #define CL_FLT_RADIX 2 #define CL_FLT_MAX 340282346638528859811704183484516925440.0f #define CL_FLT_MIN 1.175494350822287507969e-38f #define CL_FLT_EPSILON 0x1.0p-23f #define CL_DBL_DIG 15 #define CL_DBL_MANT_DIG 53 #define CL_DBL_MAX_10_EXP +308 #define CL_DBL_MAX_EXP +1024 #define CL_DBL_MIN_10_EXP -307 #define CL_DBL_MIN_EXP -1021 #define CL_DBL_RADIX 2 #define CL_DBL_MAX 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.0 #define CL_DBL_MIN 2.225073858507201383090e-308 #define CL_DBL_EPSILON 2.220446049250313080847e-16 #define CL_M_E 2.718281828459045090796 #define CL_M_LOG2E 1.442695040888963387005 #define CL_M_LOG10E 0.434294481903251816668 #define CL_M_LN2 0.693147180559945286227 #define CL_M_LN10 2.302585092994045901094 #define CL_M_PI 3.141592653589793115998 #define CL_M_PI_2 1.570796326794896557999 #define CL_M_PI_4 0.785398163397448278999 #define CL_M_1_PI 0.318309886183790691216 #define CL_M_2_PI 0.636619772367581382433 #define CL_M_2_SQRTPI 1.128379167095512558561 #define CL_M_SQRT2 1.414213562373095145475 #define CL_M_SQRT1_2 0.707106781186547572737 #define CL_M_E_F 2.71828174591064f #define CL_M_LOG2E_F 1.44269502162933f #define CL_M_LOG10E_F 0.43429449200630f #define CL_M_LN2_F 0.69314718246460f #define CL_M_LN10_F 2.30258512496948f #define CL_M_PI_F 3.14159274101257f #define CL_M_PI_2_F 1.57079637050629f #define CL_M_PI_4_F 0.78539818525314f #define CL_M_1_PI_F 0.31830987334251f #define CL_M_2_PI_F 0.63661974668503f #define CL_M_2_SQRTPI_F 1.12837922573090f #define CL_M_SQRT2_F 1.41421353816986f #define CL_M_SQRT1_2_F 0.70710676908493f #define CL_NAN (CL_INFINITY - CL_INFINITY) #define CL_HUGE_VALF ((cl_float) 1e50) #define CL_HUGE_VAL ((cl_double) 1e500) #define CL_MAXFLOAT CL_FLT_MAX #define CL_INFINITY CL_HUGE_VALF #else #include /* scalar types */ typedef int8_t cl_char; typedef uint8_t cl_uchar; typedef int16_t cl_short __attribute__((aligned(2))); typedef uint16_t cl_ushort __attribute__((aligned(2))); typedef int32_t cl_int __attribute__((aligned(4))); typedef uint32_t cl_uint __attribute__((aligned(4))); typedef int64_t cl_long __attribute__((aligned(8))); typedef uint64_t cl_ulong __attribute__((aligned(8))); typedef uint16_t cl_half __attribute__((aligned(2))); typedef float cl_float __attribute__((aligned(4))); typedef double cl_double __attribute__((aligned(8))); /* Macro names and corresponding values defined by OpenCL */ #define CL_CHAR_BIT 8 #define CL_SCHAR_MAX 127 #define CL_SCHAR_MIN (-127-1) #define CL_CHAR_MAX CL_SCHAR_MAX #define CL_CHAR_MIN CL_SCHAR_MIN #define CL_UCHAR_MAX 255 #define CL_SHRT_MAX 32767 #define CL_SHRT_MIN (-32767-1) #define CL_USHRT_MAX 65535 #define CL_INT_MAX 2147483647 #define CL_INT_MIN (-2147483647-1) #define CL_UINT_MAX 0xffffffffU #define CL_LONG_MAX ((cl_long) 0x7FFFFFFFFFFFFFFFLL) #define CL_LONG_MIN ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL) #define CL_ULONG_MAX ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL) #define CL_FLT_DIG 6 #define CL_FLT_MANT_DIG 24 #define CL_FLT_MAX_10_EXP +38 #define CL_FLT_MAX_EXP +128 #define CL_FLT_MIN_10_EXP -37 #define CL_FLT_MIN_EXP -125 #define CL_FLT_RADIX 2 #define CL_FLT_MAX 0x1.fffffep127f #define CL_FLT_MIN 0x1.0p-126f #define CL_FLT_EPSILON 0x1.0p-23f #define CL_DBL_DIG 15 #define CL_DBL_MANT_DIG 53 #define CL_DBL_MAX_10_EXP +308 #define CL_DBL_MAX_EXP +1024 #define CL_DBL_MIN_10_EXP -307 #define CL_DBL_MIN_EXP -1021 #define CL_DBL_RADIX 2 #define CL_DBL_MAX 0x1.fffffffffffffp1023 #define CL_DBL_MIN 0x1.0p-1022 #define CL_DBL_EPSILON 0x1.0p-52 #define CL_M_E 2.718281828459045090796 #define CL_M_LOG2E 1.442695040888963387005 #define CL_M_LOG10E 0.434294481903251816668 #define CL_M_LN2 0.693147180559945286227 #define CL_M_LN10 2.302585092994045901094 #define CL_M_PI 3.141592653589793115998 #define CL_M_PI_2 1.570796326794896557999 #define CL_M_PI_4 0.785398163397448278999 #define CL_M_1_PI 0.318309886183790691216 #define CL_M_2_PI 0.636619772367581382433 #define CL_M_2_SQRTPI 1.128379167095512558561 #define CL_M_SQRT2 1.414213562373095145475 #define CL_M_SQRT1_2 0.707106781186547572737 #define CL_M_E_F 2.71828174591064f #define CL_M_LOG2E_F 1.44269502162933f #define CL_M_LOG10E_F 0.43429449200630f #define CL_M_LN2_F 0.69314718246460f #define CL_M_LN10_F 2.30258512496948f #define CL_M_PI_F 3.14159274101257f #define CL_M_PI_2_F 1.57079637050629f #define CL_M_PI_4_F 0.78539818525314f #define CL_M_1_PI_F 0.31830987334251f #define CL_M_2_PI_F 0.63661974668503f #define CL_M_2_SQRTPI_F 1.12837922573090f #define CL_M_SQRT2_F 1.41421353816986f #define CL_M_SQRT1_2_F 0.70710676908493f #if defined( __GNUC__ ) #define CL_HUGE_VALF __builtin_huge_valf() #define CL_HUGE_VAL __builtin_huge_val() #define CL_NAN __builtin_nanf( "" ) #else #define CL_HUGE_VALF ((cl_float) 1e50) #define CL_HUGE_VAL ((cl_double) 1e500) float nanf( const char * ); #define CL_NAN nanf( "" ) #endif #define CL_MAXFLOAT CL_FLT_MAX #define CL_INFINITY CL_HUGE_VALF #endif #include /* Mirror types to GL types. Mirror types allow us to avoid deciding which headers to load based on whether we are using GL or GLES here. */ typedef unsigned int cl_GLuint; typedef int cl_GLint; typedef unsigned int cl_GLenum; /* * Vector types * * Note: OpenCL requires that all types be naturally aligned. * This means that vector types must be naturally aligned. * For example, a vector of four floats must be aligned to * a 16 byte boundary (calculated as 4 * the natural 4-byte * alignment of the float). The alignment qualifiers here * will only function properly if your compiler supports them * and if you don't actively work to defeat them. For example, * in order for a cl_float4 to be 16 byte aligned in a struct, * the start of the struct must itself be 16-byte aligned. * * Maintaining proper alignment is the user's responsibility. */ /* Define basic vector types */ #if defined( __VEC__ ) #include /* may be omitted depending on compiler. AltiVec spec provides no way to detect whether the header is required. */ typedef vector unsigned char __cl_uchar16; typedef vector signed char __cl_char16; typedef vector unsigned short __cl_ushort8; typedef vector signed short __cl_short8; typedef vector unsigned int __cl_uint4; typedef vector signed int __cl_int4; typedef vector float __cl_float4; #define __CL_UCHAR16__ 1 #define __CL_CHAR16__ 1 #define __CL_USHORT8__ 1 #define __CL_SHORT8__ 1 #define __CL_UINT4__ 1 #define __CL_INT4__ 1 #define __CL_FLOAT4__ 1 #endif #if defined( __SSE__ ) #if defined( __MINGW64__ ) #include #else #include #endif #if defined( __GNUC__ ) typedef float __cl_float4 __attribute__((vector_size(16))); #else typedef __m128 __cl_float4; #endif #define __CL_FLOAT4__ 1 #endif #if defined( __SSE2__ ) #if defined( __MINGW64__ ) #include #else #include #endif #if defined( __GNUC__ ) typedef cl_uchar __cl_uchar16 __attribute__((vector_size(16))); typedef cl_char __cl_char16 __attribute__((vector_size(16))); typedef cl_ushort __cl_ushort8 __attribute__((vector_size(16))); typedef cl_short __cl_short8 __attribute__((vector_size(16))); typedef cl_uint __cl_uint4 __attribute__((vector_size(16))); typedef cl_int __cl_int4 __attribute__((vector_size(16))); typedef cl_ulong __cl_ulong2 __attribute__((vector_size(16))); typedef cl_long __cl_long2 __attribute__((vector_size(16))); typedef cl_double __cl_double2 __attribute__((vector_size(16))); #else typedef __m128i __cl_uchar16; typedef __m128i __cl_char16; typedef __m128i __cl_ushort8; typedef __m128i __cl_short8; typedef __m128i __cl_uint4; typedef __m128i __cl_int4; typedef __m128i __cl_ulong2; typedef __m128i __cl_long2; typedef __m128d __cl_double2; #endif #define __CL_UCHAR16__ 1 #define __CL_CHAR16__ 1 #define __CL_USHORT8__ 1 #define __CL_SHORT8__ 1 #define __CL_INT4__ 1 #define __CL_UINT4__ 1 #define __CL_ULONG2__ 1 #define __CL_LONG2__ 1 #define __CL_DOUBLE2__ 1 #endif #if defined( __MMX__ ) #include #if defined( __GNUC__ ) typedef cl_uchar __cl_uchar8 __attribute__((vector_size(8))); typedef cl_char __cl_char8 __attribute__((vector_size(8))); typedef cl_ushort __cl_ushort4 __attribute__((vector_size(8))); typedef cl_short __cl_short4 __attribute__((vector_size(8))); typedef cl_uint __cl_uint2 __attribute__((vector_size(8))); typedef cl_int __cl_int2 __attribute__((vector_size(8))); typedef cl_ulong __cl_ulong1 __attribute__((vector_size(8))); typedef cl_long __cl_long1 __attribute__((vector_size(8))); typedef cl_float __cl_float2 __attribute__((vector_size(8))); #else typedef __m64 __cl_uchar8; typedef __m64 __cl_char8; typedef __m64 __cl_ushort4; typedef __m64 __cl_short4; typedef __m64 __cl_uint2; typedef __m64 __cl_int2; typedef __m64 __cl_ulong1; typedef __m64 __cl_long1; typedef __m64 __cl_float2; #endif #define __CL_UCHAR8__ 1 #define __CL_CHAR8__ 1 #define __CL_USHORT4__ 1 #define __CL_SHORT4__ 1 #define __CL_INT2__ 1 #define __CL_UINT2__ 1 #define __CL_ULONG1__ 1 #define __CL_LONG1__ 1 #define __CL_FLOAT2__ 1 #endif #if defined( __AVX__ ) #if defined( __MINGW64__ ) #include #else #include #endif #if defined( __GNUC__ ) typedef cl_float __cl_float8 __attribute__((vector_size(32))); typedef cl_double __cl_double4 __attribute__((vector_size(32))); #else typedef __m256 __cl_float8; typedef __m256d __cl_double4; #endif #define __CL_FLOAT8__ 1 #define __CL_DOUBLE4__ 1 #endif /* Define alignment keys */ #if defined( __GNUC__ ) #define CL_ALIGNED(_x) __attribute__ ((aligned(_x))) #elif defined( _WIN32) && (_MSC_VER) /* Alignment keys neutered on windows because MSVC can't swallow function arguments with alignment requirements */ /* http://msdn.microsoft.com/en-us/library/373ak2y1%28VS.71%29.aspx */ /* #include */ /* #define CL_ALIGNED(_x) _CRT_ALIGN(_x) */ #define CL_ALIGNED(_x) #else #warning Need to implement some method to align data here #define CL_ALIGNED(_x) #endif /* Indicate whether .xyzw, .s0123 and .hi.lo are supported */ #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) /* .xyzw and .s0123...{f|F} are supported */ #define CL_HAS_NAMED_VECTOR_FIELDS 1 /* .hi and .lo are supported */ #define CL_HAS_HI_LO_VECTOR_FIELDS 1 #endif /* Define cl_vector types */ /* ---- cl_charn ---- */ typedef union { cl_char CL_ALIGNED(2) s[2]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_char x, y; }; __extension__ struct{ cl_char s0, s1; }; __extension__ struct{ cl_char lo, hi; }; #endif #if defined( __CL_CHAR2__) __cl_char2 v2; #endif }cl_char2; typedef union { cl_char CL_ALIGNED(4) s[4]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_char x, y, z, w; }; __extension__ struct{ cl_char s0, s1, s2, s3; }; __extension__ struct{ cl_char2 lo, hi; }; #endif #if defined( __CL_CHAR2__) __cl_char2 v2[2]; #endif #if defined( __CL_CHAR4__) __cl_char4 v4; #endif }cl_char4; /* cl_char3 is identical in size, alignment and behavior to cl_char4. See section 6.1.5. */ typedef cl_char4 cl_char3; typedef union { cl_char CL_ALIGNED(8) s[8]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_char x, y, z, w; }; __extension__ struct{ cl_char s0, s1, s2, s3, s4, s5, s6, s7; }; __extension__ struct{ cl_char4 lo, hi; }; #endif #if defined( __CL_CHAR2__) __cl_char2 v2[4]; #endif #if defined( __CL_CHAR4__) __cl_char4 v4[2]; #endif #if defined( __CL_CHAR8__ ) __cl_char8 v8; #endif }cl_char8; typedef union { cl_char CL_ALIGNED(16) s[16]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_char x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; __extension__ struct{ cl_char s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; __extension__ struct{ cl_char8 lo, hi; }; #endif #if defined( __CL_CHAR2__) __cl_char2 v2[8]; #endif #if defined( __CL_CHAR4__) __cl_char4 v4[4]; #endif #if defined( __CL_CHAR8__ ) __cl_char8 v8[2]; #endif #if defined( __CL_CHAR16__ ) __cl_char16 v16; #endif }cl_char16; /* ---- cl_ucharn ---- */ typedef union { cl_uchar CL_ALIGNED(2) s[2]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_uchar x, y; }; __extension__ struct{ cl_uchar s0, s1; }; __extension__ struct{ cl_uchar lo, hi; }; #endif #if defined( __cl_uchar2__) __cl_uchar2 v2; #endif }cl_uchar2; typedef union { cl_uchar CL_ALIGNED(4) s[4]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_uchar x, y, z, w; }; __extension__ struct{ cl_uchar s0, s1, s2, s3; }; __extension__ struct{ cl_uchar2 lo, hi; }; #endif #if defined( __CL_UCHAR2__) __cl_uchar2 v2[2]; #endif #if defined( __CL_UCHAR4__) __cl_uchar4 v4; #endif }cl_uchar4; /* cl_uchar3 is identical in size, alignment and behavior to cl_uchar4. See section 6.1.5. */ typedef cl_uchar4 cl_uchar3; typedef union { cl_uchar CL_ALIGNED(8) s[8]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_uchar x, y, z, w; }; __extension__ struct{ cl_uchar s0, s1, s2, s3, s4, s5, s6, s7; }; __extension__ struct{ cl_uchar4 lo, hi; }; #endif #if defined( __CL_UCHAR2__) __cl_uchar2 v2[4]; #endif #if defined( __CL_UCHAR4__) __cl_uchar4 v4[2]; #endif #if defined( __CL_UCHAR8__ ) __cl_uchar8 v8; #endif }cl_uchar8; typedef union { cl_uchar CL_ALIGNED(16) s[16]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_uchar x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; __extension__ struct{ cl_uchar s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; __extension__ struct{ cl_uchar8 lo, hi; }; #endif #if defined( __CL_UCHAR2__) __cl_uchar2 v2[8]; #endif #if defined( __CL_UCHAR4__) __cl_uchar4 v4[4]; #endif #if defined( __CL_UCHAR8__ ) __cl_uchar8 v8[2]; #endif #if defined( __CL_UCHAR16__ ) __cl_uchar16 v16; #endif }cl_uchar16; /* ---- cl_shortn ---- */ typedef union { cl_short CL_ALIGNED(4) s[2]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_short x, y; }; __extension__ struct{ cl_short s0, s1; }; __extension__ struct{ cl_short lo, hi; }; #endif #if defined( __CL_SHORT2__) __cl_short2 v2; #endif }cl_short2; typedef union { cl_short CL_ALIGNED(8) s[4]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_short x, y, z, w; }; __extension__ struct{ cl_short s0, s1, s2, s3; }; __extension__ struct{ cl_short2 lo, hi; }; #endif #if defined( __CL_SHORT2__) __cl_short2 v2[2]; #endif #if defined( __CL_SHORT4__) __cl_short4 v4; #endif }cl_short4; /* cl_short3 is identical in size, alignment and behavior to cl_short4. See section 6.1.5. */ typedef cl_short4 cl_short3; typedef union { cl_short CL_ALIGNED(16) s[8]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_short x, y, z, w; }; __extension__ struct{ cl_short s0, s1, s2, s3, s4, s5, s6, s7; }; __extension__ struct{ cl_short4 lo, hi; }; #endif #if defined( __CL_SHORT2__) __cl_short2 v2[4]; #endif #if defined( __CL_SHORT4__) __cl_short4 v4[2]; #endif #if defined( __CL_SHORT8__ ) __cl_short8 v8; #endif }cl_short8; typedef union { cl_short CL_ALIGNED(32) s[16]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_short x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; __extension__ struct{ cl_short s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; __extension__ struct{ cl_short8 lo, hi; }; #endif #if defined( __CL_SHORT2__) __cl_short2 v2[8]; #endif #if defined( __CL_SHORT4__) __cl_short4 v4[4]; #endif #if defined( __CL_SHORT8__ ) __cl_short8 v8[2]; #endif #if defined( __CL_SHORT16__ ) __cl_short16 v16; #endif }cl_short16; /* ---- cl_ushortn ---- */ typedef union { cl_ushort CL_ALIGNED(4) s[2]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_ushort x, y; }; __extension__ struct{ cl_ushort s0, s1; }; __extension__ struct{ cl_ushort lo, hi; }; #endif #if defined( __CL_USHORT2__) __cl_ushort2 v2; #endif }cl_ushort2; typedef union { cl_ushort CL_ALIGNED(8) s[4]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_ushort x, y, z, w; }; __extension__ struct{ cl_ushort s0, s1, s2, s3; }; __extension__ struct{ cl_ushort2 lo, hi; }; #endif #if defined( __CL_USHORT2__) __cl_ushort2 v2[2]; #endif #if defined( __CL_USHORT4__) __cl_ushort4 v4; #endif }cl_ushort4; /* cl_ushort3 is identical in size, alignment and behavior to cl_ushort4. See section 6.1.5. */ typedef cl_ushort4 cl_ushort3; typedef union { cl_ushort CL_ALIGNED(16) s[8]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_ushort x, y, z, w; }; __extension__ struct{ cl_ushort s0, s1, s2, s3, s4, s5, s6, s7; }; __extension__ struct{ cl_ushort4 lo, hi; }; #endif #if defined( __CL_USHORT2__) __cl_ushort2 v2[4]; #endif #if defined( __CL_USHORT4__) __cl_ushort4 v4[2]; #endif #if defined( __CL_USHORT8__ ) __cl_ushort8 v8; #endif }cl_ushort8; typedef union { cl_ushort CL_ALIGNED(32) s[16]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_ushort x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; __extension__ struct{ cl_ushort s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; __extension__ struct{ cl_ushort8 lo, hi; }; #endif #if defined( __CL_USHORT2__) __cl_ushort2 v2[8]; #endif #if defined( __CL_USHORT4__) __cl_ushort4 v4[4]; #endif #if defined( __CL_USHORT8__ ) __cl_ushort8 v8[2]; #endif #if defined( __CL_USHORT16__ ) __cl_ushort16 v16; #endif }cl_ushort16; /* ---- cl_intn ---- */ typedef union { cl_int CL_ALIGNED(8) s[2]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_int x, y; }; __extension__ struct{ cl_int s0, s1; }; __extension__ struct{ cl_int lo, hi; }; #endif #if defined( __CL_INT2__) __cl_int2 v2; #endif }cl_int2; typedef union { cl_int CL_ALIGNED(16) s[4]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_int x, y, z, w; }; __extension__ struct{ cl_int s0, s1, s2, s3; }; __extension__ struct{ cl_int2 lo, hi; }; #endif #if defined( __CL_INT2__) __cl_int2 v2[2]; #endif #if defined( __CL_INT4__) __cl_int4 v4; #endif }cl_int4; /* cl_int3 is identical in size, alignment and behavior to cl_int4. See section 6.1.5. */ typedef cl_int4 cl_int3; typedef union { cl_int CL_ALIGNED(32) s[8]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_int x, y, z, w; }; __extension__ struct{ cl_int s0, s1, s2, s3, s4, s5, s6, s7; }; __extension__ struct{ cl_int4 lo, hi; }; #endif #if defined( __CL_INT2__) __cl_int2 v2[4]; #endif #if defined( __CL_INT4__) __cl_int4 v4[2]; #endif #if defined( __CL_INT8__ ) __cl_int8 v8; #endif }cl_int8; typedef union { cl_int CL_ALIGNED(64) s[16]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_int x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; __extension__ struct{ cl_int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; __extension__ struct{ cl_int8 lo, hi; }; #endif #if defined( __CL_INT2__) __cl_int2 v2[8]; #endif #if defined( __CL_INT4__) __cl_int4 v4[4]; #endif #if defined( __CL_INT8__ ) __cl_int8 v8[2]; #endif #if defined( __CL_INT16__ ) __cl_int16 v16; #endif }cl_int16; /* ---- cl_uintn ---- */ typedef union { cl_uint CL_ALIGNED(8) s[2]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_uint x, y; }; __extension__ struct{ cl_uint s0, s1; }; __extension__ struct{ cl_uint lo, hi; }; #endif #if defined( __CL_UINT2__) __cl_uint2 v2; #endif }cl_uint2; typedef union { cl_uint CL_ALIGNED(16) s[4]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_uint x, y, z, w; }; __extension__ struct{ cl_uint s0, s1, s2, s3; }; __extension__ struct{ cl_uint2 lo, hi; }; #endif #if defined( __CL_UINT2__) __cl_uint2 v2[2]; #endif #if defined( __CL_UINT4__) __cl_uint4 v4; #endif }cl_uint4; /* cl_uint3 is identical in size, alignment and behavior to cl_uint4. See section 6.1.5. */ typedef cl_uint4 cl_uint3; typedef union { cl_uint CL_ALIGNED(32) s[8]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_uint x, y, z, w; }; __extension__ struct{ cl_uint s0, s1, s2, s3, s4, s5, s6, s7; }; __extension__ struct{ cl_uint4 lo, hi; }; #endif #if defined( __CL_UINT2__) __cl_uint2 v2[4]; #endif #if defined( __CL_UINT4__) __cl_uint4 v4[2]; #endif #if defined( __CL_UINT8__ ) __cl_uint8 v8; #endif }cl_uint8; typedef union { cl_uint CL_ALIGNED(64) s[16]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_uint x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; __extension__ struct{ cl_uint s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; __extension__ struct{ cl_uint8 lo, hi; }; #endif #if defined( __CL_UINT2__) __cl_uint2 v2[8]; #endif #if defined( __CL_UINT4__) __cl_uint4 v4[4]; #endif #if defined( __CL_UINT8__ ) __cl_uint8 v8[2]; #endif #if defined( __CL_UINT16__ ) __cl_uint16 v16; #endif }cl_uint16; /* ---- cl_longn ---- */ typedef union { cl_long CL_ALIGNED(16) s[2]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_long x, y; }; __extension__ struct{ cl_long s0, s1; }; __extension__ struct{ cl_long lo, hi; }; #endif #if defined( __CL_LONG2__) __cl_long2 v2; #endif }cl_long2; typedef union { cl_long CL_ALIGNED(32) s[4]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_long x, y, z, w; }; __extension__ struct{ cl_long s0, s1, s2, s3; }; __extension__ struct{ cl_long2 lo, hi; }; #endif #if defined( __CL_LONG2__) __cl_long2 v2[2]; #endif #if defined( __CL_LONG4__) __cl_long4 v4; #endif }cl_long4; /* cl_long3 is identical in size, alignment and behavior to cl_long4. See section 6.1.5. */ typedef cl_long4 cl_long3; typedef union { cl_long CL_ALIGNED(64) s[8]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_long x, y, z, w; }; __extension__ struct{ cl_long s0, s1, s2, s3, s4, s5, s6, s7; }; __extension__ struct{ cl_long4 lo, hi; }; #endif #if defined( __CL_LONG2__) __cl_long2 v2[4]; #endif #if defined( __CL_LONG4__) __cl_long4 v4[2]; #endif #if defined( __CL_LONG8__ ) __cl_long8 v8; #endif }cl_long8; typedef union { cl_long CL_ALIGNED(128) s[16]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_long x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; __extension__ struct{ cl_long s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; __extension__ struct{ cl_long8 lo, hi; }; #endif #if defined( __CL_LONG2__) __cl_long2 v2[8]; #endif #if defined( __CL_LONG4__) __cl_long4 v4[4]; #endif #if defined( __CL_LONG8__ ) __cl_long8 v8[2]; #endif #if defined( __CL_LONG16__ ) __cl_long16 v16; #endif }cl_long16; /* ---- cl_ulongn ---- */ typedef union { cl_ulong CL_ALIGNED(16) s[2]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_ulong x, y; }; __extension__ struct{ cl_ulong s0, s1; }; __extension__ struct{ cl_ulong lo, hi; }; #endif #if defined( __CL_ULONG2__) __cl_ulong2 v2; #endif }cl_ulong2; typedef union { cl_ulong CL_ALIGNED(32) s[4]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_ulong x, y, z, w; }; __extension__ struct{ cl_ulong s0, s1, s2, s3; }; __extension__ struct{ cl_ulong2 lo, hi; }; #endif #if defined( __CL_ULONG2__) __cl_ulong2 v2[2]; #endif #if defined( __CL_ULONG4__) __cl_ulong4 v4; #endif }cl_ulong4; /* cl_ulong3 is identical in size, alignment and behavior to cl_ulong4. See section 6.1.5. */ typedef cl_ulong4 cl_ulong3; typedef union { cl_ulong CL_ALIGNED(64) s[8]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_ulong x, y, z, w; }; __extension__ struct{ cl_ulong s0, s1, s2, s3, s4, s5, s6, s7; }; __extension__ struct{ cl_ulong4 lo, hi; }; #endif #if defined( __CL_ULONG2__) __cl_ulong2 v2[4]; #endif #if defined( __CL_ULONG4__) __cl_ulong4 v4[2]; #endif #if defined( __CL_ULONG8__ ) __cl_ulong8 v8; #endif }cl_ulong8; typedef union { cl_ulong CL_ALIGNED(128) s[16]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_ulong x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; __extension__ struct{ cl_ulong s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; __extension__ struct{ cl_ulong8 lo, hi; }; #endif #if defined( __CL_ULONG2__) __cl_ulong2 v2[8]; #endif #if defined( __CL_ULONG4__) __cl_ulong4 v4[4]; #endif #if defined( __CL_ULONG8__ ) __cl_ulong8 v8[2]; #endif #if defined( __CL_ULONG16__ ) __cl_ulong16 v16; #endif }cl_ulong16; /* --- cl_floatn ---- */ typedef union { cl_float CL_ALIGNED(8) s[2]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_float x, y; }; __extension__ struct{ cl_float s0, s1; }; __extension__ struct{ cl_float lo, hi; }; #endif #if defined( __CL_FLOAT2__) __cl_float2 v2; #endif }cl_float2; typedef union { cl_float CL_ALIGNED(16) s[4]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_float x, y, z, w; }; __extension__ struct{ cl_float s0, s1, s2, s3; }; __extension__ struct{ cl_float2 lo, hi; }; #endif #if defined( __CL_FLOAT2__) __cl_float2 v2[2]; #endif #if defined( __CL_FLOAT4__) __cl_float4 v4; #endif }cl_float4; /* cl_float3 is identical in size, alignment and behavior to cl_float4. See section 6.1.5. */ typedef cl_float4 cl_float3; typedef union { cl_float CL_ALIGNED(32) s[8]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_float x, y, z, w; }; __extension__ struct{ cl_float s0, s1, s2, s3, s4, s5, s6, s7; }; __extension__ struct{ cl_float4 lo, hi; }; #endif #if defined( __CL_FLOAT2__) __cl_float2 v2[4]; #endif #if defined( __CL_FLOAT4__) __cl_float4 v4[2]; #endif #if defined( __CL_FLOAT8__ ) __cl_float8 v8; #endif }cl_float8; typedef union { cl_float CL_ALIGNED(64) s[16]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_float x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; __extension__ struct{ cl_float s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; __extension__ struct{ cl_float8 lo, hi; }; #endif #if defined( __CL_FLOAT2__) __cl_float2 v2[8]; #endif #if defined( __CL_FLOAT4__) __cl_float4 v4[4]; #endif #if defined( __CL_FLOAT8__ ) __cl_float8 v8[2]; #endif #if defined( __CL_FLOAT16__ ) __cl_float16 v16; #endif }cl_float16; /* --- cl_doublen ---- */ typedef union { cl_double CL_ALIGNED(16) s[2]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_double x, y; }; __extension__ struct{ cl_double s0, s1; }; __extension__ struct{ cl_double lo, hi; }; #endif #if defined( __CL_DOUBLE2__) __cl_double2 v2; #endif }cl_double2; typedef union { cl_double CL_ALIGNED(32) s[4]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_double x, y, z, w; }; __extension__ struct{ cl_double s0, s1, s2, s3; }; __extension__ struct{ cl_double2 lo, hi; }; #endif #if defined( __CL_DOUBLE2__) __cl_double2 v2[2]; #endif #if defined( __CL_DOUBLE4__) __cl_double4 v4; #endif }cl_double4; /* cl_double3 is identical in size, alignment and behavior to cl_double4. See section 6.1.5. */ typedef cl_double4 cl_double3; typedef union { cl_double CL_ALIGNED(64) s[8]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_double x, y, z, w; }; __extension__ struct{ cl_double s0, s1, s2, s3, s4, s5, s6, s7; }; __extension__ struct{ cl_double4 lo, hi; }; #endif #if defined( __CL_DOUBLE2__) __cl_double2 v2[4]; #endif #if defined( __CL_DOUBLE4__) __cl_double4 v4[2]; #endif #if defined( __CL_DOUBLE8__ ) __cl_double8 v8; #endif }cl_double8; typedef union { cl_double CL_ALIGNED(128) s[16]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_double x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; __extension__ struct{ cl_double s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; __extension__ struct{ cl_double8 lo, hi; }; #endif #if defined( __CL_DOUBLE2__) __cl_double2 v2[8]; #endif #if defined( __CL_DOUBLE4__) __cl_double4 v4[4]; #endif #if defined( __CL_DOUBLE8__ ) __cl_double8 v8[2]; #endif #if defined( __CL_DOUBLE16__ ) __cl_double16 v16; #endif }cl_double16; /* Macro to facilitate debugging * Usage: * Place CL_PROGRAM_STRING_DEBUG_INFO on the line before the first line of your source. * The first line ends with: CL_PROGRAM_STRING_BEGIN \" * Each line thereafter of OpenCL C source must end with: \n\ * The last line ends in "; * * Example: * * const char *my_program = CL_PROGRAM_STRING_BEGIN "\ * kernel void foo( int a, float * b ) \n\ * { \n\ * // my comment \n\ * *b[ get_global_id(0)] = a; \n\ * } \n\ * "; * * This should correctly set up the line, (column) and file information for your source * string so you can do source level debugging. */ #define __CL_STRINGIFY( _x ) # _x #define _CL_STRINGIFY( _x ) __CL_STRINGIFY( _x ) #define CL_PROGRAM_STRING_DEBUG_INFO "#line " _CL_STRINGIFY(__LINE__) " \"" __FILE__ "\" \n\n" #ifdef __cplusplus } #endif #endif /* __CL_PLATFORM_H */ Release_v0.3/include/CL/opencl.h000066400000000000000000000033321223142177000165560ustar00rootroot00000000000000/******************************************************************************* * Copyright (c) 2008-2010 The Khronos Group Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and/or associated documentation files (the * "Materials"), to deal in the Materials without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Materials, and to * permit persons to whom the Materials are furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Materials. * * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. ******************************************************************************/ /* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */ #ifndef __OPENCL_H #define __OPENCL_H #ifdef __cplusplus extern "C" { #endif #ifdef __APPLE__ #include #include #include #include #else #include #include #include #include #endif #ifdef __cplusplus } #endif #endif /* __OPENCL_H */ Release_v0.3/include/CMakeLists.txt000066400000000000000000000002551223142177000173700ustar00rootroot00000000000000FILE(GLOB HEADER_FILES "CL/*.h") FILE(GLOB HPP_FILES "CL/*.hpp") install (FILES ${HEADER_FILES} DESTINATION include/CL) install (FILES ${HPP_FILES} DESTINATION include/CL) Release_v0.3/kernels/000077500000000000000000000000001223142177000146465ustar00rootroot00000000000000Release_v0.3/kernels/buildin_work_dim.cl000066400000000000000000000001171223142177000205060ustar00rootroot00000000000000kernel void buildin_work_dim( __global int *ret ) { *ret = get_work_dim(); } Release_v0.3/kernels/builtin_acos_asin.cl000066400000000000000000000005221223142177000206520ustar00rootroot00000000000000__kernel void builtin_acos_asin(__global float *dst, __global float *src, __global int *max_func) { int i = get_global_id(0); float x = src[i]; dst[i * (*max_func) + 0] = acos(x); dst[i * (*max_func) + 1] = acosh(x); dst[i * (*max_func) + 2] = asin(x); dst[i * (*max_func) + 3] = asinh(x); dst[i * (*max_func) + 4] = x; }; Release_v0.3/kernels/builtin_atan2.cl000066400000000000000000000002161223142177000177200ustar00rootroot00000000000000kernel void builtin_atan2(global float *y, global float *x, global float *dst) { int i = get_global_id(0); dst[i] = atan2(y[i], x[i]); }; Release_v0.3/kernels/builtin_bitselect.cl000066400000000000000000000002761223142177000206770ustar00rootroot00000000000000kernel void builtin_bitselect(global float *src1, global float *src2, global float *src3, global float *dst) { int i = get_global_id(0); dst[i] = bitselect(src1[i], src2[i], src3[i]); } Release_v0.3/kernels/builtin_convert_sat.cl000066400000000000000000000017061223142177000212470ustar00rootroot00000000000000#define DEF(DSTTYPE, SRCTYPE) \ kernel void builtin_convert_ ## SRCTYPE ## _to_ ## DSTTYPE ## _sat(global SRCTYPE *src, global DSTTYPE *dst) { \ int i = get_global_id(0); \ dst[i] = convert_ ## DSTTYPE ## _sat(src[i]); \ } DEF(char, uchar); DEF(char, short); DEF(char, ushort); DEF(char, int); DEF(char, uint); DEF(char, long); DEF(char, ulong); DEF(char, float); DEF(uchar, char); DEF(uchar, short); DEF(uchar, ushort); DEF(uchar, int); DEF(uchar, uint); DEF(uchar, long); DEF(uchar, ulong); DEF(uchar, float); DEF(short, ushort); DEF(short, int); DEF(short, uint); DEF(short, long); DEF(short, ulong); DEF(short, float); DEF(ushort, short); DEF(ushort, int); DEF(ushort, uint); DEF(ushort, long); DEF(ushort, ulong); DEF(ushort, float); DEF(int, uint); DEF(int, long); DEF(int, ulong); DEF(int, float); DEF(uint, int); DEF(uint, long); DEF(uint, ulong); DEF(uint, float); DEF(long, ulong); DEF(long, float); DEF(ulong, long); DEF(ulong, float); #undef DEF Release_v0.3/kernels/builtin_frexp.cl000066400000000000000000000002201223142177000200320ustar00rootroot00000000000000kernel void builtin_frexp(global float *src, global float *dst, global int *e) { int i = get_global_id(0); dst[i] = frexp(src[i], &e[i]); } Release_v0.3/kernels/builtin_global_id.cl000066400000000000000000000002171223142177000206300ustar00rootroot00000000000000kernel void builtin_global_id( __global int *ret) { int id = get_global_id(0) + get_global_id(1)*3 + get_global_id(2)*3*4; ret[id] = id; } Release_v0.3/kernels/builtin_global_size.cl000066400000000000000000000001611223142177000212040ustar00rootroot00000000000000kernel void builtin_global_size( __global int *ret, __global int *i_dim ) { *ret = get_global_size( *i_dim); } Release_v0.3/kernels/builtin_lgamma.cl000066400000000000000000000001751223142177000201550ustar00rootroot00000000000000kernel void builtin_lgamma(global float *src, global float *dst) { int i = get_global_id(0); dst[i] = lgamma(src[i]); }; Release_v0.3/kernels/builtin_lgamma_r.cl000066400000000000000000000002351223142177000204730ustar00rootroot00000000000000kernel void builtin_lgamma_r(global float *src, global float *dst, global int *signp) { int i = get_global_id(0); dst[i] = lgamma_r(src[i], signp+i); }; Release_v0.3/kernels/builtin_local_id.cl000066400000000000000000000003551223142177000204650ustar00rootroot00000000000000kernel void builtin_local_id( __global int *ret) { int id = get_local_id(0) + get_group_id(0) * 2 + \ get_local_id(1) * 4 + get_group_id(1) * 12 +\ get_local_id(2) * 36 + get_group_id(2) * 144; ret[id] = id; } Release_v0.3/kernels/builtin_local_size.cl000066400000000000000000000001571223142177000210430ustar00rootroot00000000000000kernel void builtin_local_size( __global int *ret, __global int *i_dim ) { *ret = get_local_size( *i_dim); } Release_v0.3/kernels/builtin_mad_sat.cl000066400000000000000000000002741223142177000203270ustar00rootroot00000000000000kernel void builtin_mad_sat(global short *src1, global short *src2, global short *src3, global short *dst) { short i = get_global_id(0); dst[i] = mad_sat(src1[i], src2[i], src3[i]); } Release_v0.3/kernels/builtin_modf.cl000066400000000000000000000002461223142177000176430ustar00rootroot00000000000000kernel void builtin_modf(global float *src, global float *dst, global float *it) { int i = get_global_id(0); float x; dst[i] = modf(src[i], &x); it[i] = x; } Release_v0.3/kernels/builtin_nextafter.cl000066400000000000000000000002411223142177000207110ustar00rootroot00000000000000kernel void builtin_nextafter(global float *src1, global float *src2, global float *dst) { int i = get_global_id(0); dst[i] = nextafter(src1[i], src2[i]); } Release_v0.3/kernels/builtin_num_groups.cl000066400000000000000000000001571223142177000211150ustar00rootroot00000000000000kernel void builtin_num_groups( __global int *ret, __global int *i_dim ) { *ret = get_num_groups( *i_dim); } Release_v0.3/kernels/builtin_remquo.cl000066400000000000000000000002741223142177000202270ustar00rootroot00000000000000kernel void builtin_remquo(global float *x, global float *y, global float *dst, global int *quo) { int i = get_global_id(0); int q; dst[i] = remquo(x[i], y[i], & q); quo[i] = q; } Release_v0.3/kernels/builtin_shuffle.cl000066400000000000000000000004401223142177000203460ustar00rootroot00000000000000kernel void builtin_shuffle(global float *src1, global float *src2, global float *dst1, global float *dst2) { int i = get_global_id(0); float2 src = (float2)(src1[i], src2[i]); uint2 mask = (uint2)(1, 0); float2 dst = shuffle(src, mask); dst1[i] = dst.s0; dst2[i] = dst.s1; } Release_v0.3/kernels/builtin_shuffle2.cl000066400000000000000000000010641223142177000204330ustar00rootroot00000000000000kernel void builtin_shuffle2(global float *src1, global float *src2, global float *dst1, global float *dst2) { int i = get_global_id(0); float2 x = (float2)(src1[i], src2[i]); float2 y = (float2)(1234, 5678); uint4 mask = (uint4)(1, 0, 0, 0); float4 v1 = shuffle2(x, y, mask); float16 x2 = 0; float16 y2 = (float16)(src1[i], src2[i], 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); uint16 mask2 = (uint16)(17, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); float16 v2 = shuffle2(x2, y2, mask2); dst1[i] = v1.s0 + v2.s0; dst2[i] = v1.s1 + v2.s1; } Release_v0.3/kernels/builtin_sign.cl000066400000000000000000000001701223142177000176520ustar00rootroot00000000000000kernel void builtin_sign(global float *src, global float *dst) { int i = get_global_id(0); dst[i] = sign(src[i]); } Release_v0.3/kernels/builtin_sinpi.cl000066400000000000000000000001731223142177000200370ustar00rootroot00000000000000kernel void builtin_sinpi(global float *src, global float *dst) { int i = get_global_id(0); dst[i] = sinpi(src[i]); }; Release_v0.3/kernels/builtin_tgamma.cl000066400000000000000000000001751223142177000201650ustar00rootroot00000000000000kernel void builtin_tgamma(global float *src, global float *dst) { int i = get_global_id(0); dst[i] = tgamma(src[i]); }; Release_v0.3/kernels/compiler_abs.cl000066400000000000000000000015651223142177000176340ustar00rootroot00000000000000#define COMPILER_ABS_FUNC_1(TYPE, UTYPE) \ kernel void compiler_abs_##TYPE ( \ global TYPE* src, global UTYPE* dst) { \ int i = get_global_id(0); \ dst[i] = abs(src[i]); \ } #define COMPILER_ABS_FUNC_N(TYPE, UTYPE, N) \ kernel void compiler_abs_##TYPE##N ( \ global TYPE##N* src, global UTYPE##N* dst) { \ int i = get_global_id(0); \ dst[i] = abs(src[i]); \ } #define COMPILER_ABS(TYPE, UTYPE) \ COMPILER_ABS_FUNC_1(TYPE, UTYPE) \ COMPILER_ABS_FUNC_N(TYPE, UTYPE, 2) \ COMPILER_ABS_FUNC_N(TYPE, UTYPE, 3) \ COMPILER_ABS_FUNC_N(TYPE, UTYPE, 4) \ COMPILER_ABS_FUNC_N(TYPE, UTYPE, 8) \ COMPILER_ABS_FUNC_N(TYPE, UTYPE, 16) COMPILER_ABS(int, uint) COMPILER_ABS(uint, uint) COMPILER_ABS(char, uchar) COMPILER_ABS(uchar, uchar) COMPILER_ABS(short, ushort) COMPILER_ABS(ushort, ushort) Release_v0.3/kernels/compiler_abs_diff.cl000066400000000000000000000017511223142177000206210ustar00rootroot00000000000000#define COMPILER_ABS_FUNC_1(TYPE, UTYPE) \ kernel void compiler_abs_diff_##TYPE ( \ global TYPE* x, global TYPE* y, global UTYPE* diff) { \ int i = get_global_id(0); \ diff[i] = abs_diff(x[i], y[i]); \ } #define COMPILER_ABS_FUNC_N(TYPE, UTYPE, N) \ kernel void compiler_abs_diff_##TYPE##N ( \ global TYPE##N* x, global TYPE##N* y, global UTYPE##N* diff) { \ int i = get_global_id(0); \ diff[i] = abs_diff(x[i], y[i]); \ } #define COMPILER_ABS(TYPE, UTYPE) \ COMPILER_ABS_FUNC_1(TYPE, UTYPE) \ COMPILER_ABS_FUNC_N(TYPE, UTYPE, 2) \ COMPILER_ABS_FUNC_N(TYPE, UTYPE, 3) \ COMPILER_ABS_FUNC_N(TYPE, UTYPE, 4) \ COMPILER_ABS_FUNC_N(TYPE, UTYPE, 8) \ COMPILER_ABS_FUNC_N(TYPE, UTYPE, 16) COMPILER_ABS(int, uint) COMPILER_ABS(uint, uint) COMPILER_ABS(char, uchar) COMPILER_ABS(uchar, uchar) COMPILER_ABS(short, ushort) COMPILER_ABS(ushort, ushort) COMPILER_ABS(long, ulong) COMPILER_ABS(ulong, ulong) Release_v0.3/kernels/compiler_address_space.cl000066400000000000000000000004641223142177000216640ustar00rootroot00000000000000/* test OpenCL 1.1 Address Space Qualifiers (section 6.5) */ __constant float cf1[] = {1, 2, 3}; constant float cf2[] = {4, 5, 6}; __kernel void compiler_address_space(__global float *gf1, global float *gf2) { __local float lf1[4]; local float lf2[4]; __private float pf1[4]; private float pf2[4]; } Release_v0.3/kernels/compiler_argument_structure.cl000066400000000000000000000002441223142177000230220ustar00rootroot00000000000000struct hop { int x, y; }; __kernel void compiler_argument_structure(__global int *dst, struct hop h) { int id = (int)get_global_id(0); dst[id] = h.x + h.y; } Release_v0.3/kernels/compiler_argument_structure_indirect.cl000066400000000000000000000002601223142177000247010ustar00rootroot00000000000000struct hop { int x[16]; }; __kernel void compiler_argument_structure(__global int *dst, struct hop h) { int id = (int)get_global_id(0); dst[id] = h.x[get_local_id(0)]; } Release_v0.3/kernels/compiler_arith_shift_right.cl000066400000000000000000000002061223142177000225570ustar00rootroot00000000000000kernel void compiler_arith_shift_right(global int *src, global int *dst) { int i = get_global_id(0); dst[i] = src[i] >> 24; } Release_v0.3/kernels/compiler_array.cl000066400000000000000000000004401223142177000201740ustar00rootroot00000000000000__kernel void compiler_array(__global int *src, __global int *dst) { int array[16]; int i; for (i = 0; i < 16; ++i) { if (src[0] > 10) array[i] = get_local_id(0); else array[15 - i] = 3 + get_local_id(1); } dst[get_global_id(0)] = array[get_local_id(0)]; } Release_v0.3/kernels/compiler_array0.cl000066400000000000000000000005421223142177000202570ustar00rootroot00000000000000__kernel void compiler_array0(__global int *src, __global int *dst) { int i; int final[16]; for (i = 0; i < 16; ++i) { int array[16], j; for (j = 0; j < 16; ++j) array[j] = get_global_id(0); for (j = 0; j < src[0]; ++j) array[j] = 1+src[j]; final[i] = array[i]; } dst[get_global_id(0)] = final[get_global_id(0)]; } Release_v0.3/kernels/compiler_array1.cl000066400000000000000000000005471223142177000202650ustar00rootroot00000000000000__kernel void compiler_array1(__global int *src, __global int *dst) { int final[16]; for (int i = 0; i < 16; ++i) { int array[16]; for (int j = 0; j < src[0]; ++j) array[j] = 1+src[0]; for (int j = src[0]; j < 16; ++j) array[j] = get_global_id(0); final[i] = array[i]; } dst[get_global_id(0)] = final[get_global_id(0)]; } Release_v0.3/kernels/compiler_array2.cl000066400000000000000000000005331223142177000202610ustar00rootroot00000000000000__kernel void compiler_array2(__global int *src, __global int *dst) { int final[16]; int array[16]; for (int j = 0; j < 16; ++j) array[j] = j; for (int j = 0; j < 16; ++j) final[j] = j+1; if (get_global_id(0) == 15) dst[get_global_id(0)] = final[get_global_id(0)]; else dst[get_global_id(0)] = array[15 - get_global_id(0)]; } Release_v0.3/kernels/compiler_array3.cl000066400000000000000000000005111223142177000202560ustar00rootroot00000000000000__kernel void compiler_array3(__global int *src, __global int *dst) { int tmp[32]; for (int i = 0; i < 16; ++i) { for (int j = 0; j < 16; ++j) tmp[j] = get_global_id(0); for (int j = 0; j < src[0]; ++j) tmp[j] = 1+src[j]; tmp[16+i] = tmp[i]; } dst[get_global_id(0)] = tmp[16+get_global_id(0)]; } Release_v0.3/kernels/compiler_async_copy.cl000066400000000000000000000014331223142177000212300ustar00rootroot00000000000000#define DEF(TYPE) \ kernel void \ compiler_async_copy_##TYPE(__global TYPE *dst, __global TYPE *src, __local TYPE *localBuffer, int copiesPerWorkItem) \ { \ event_t event; \ int copiesPerWorkgroup = copiesPerWorkItem * get_local_size(0); \ int i; \ event = async_work_group_copy((__local TYPE*)localBuffer, (__global const TYPE*)(src+copiesPerWorkgroup*get_group_id(0)), (size_t)copiesPerWorkgroup, (event_t)0 ); \ wait_group_events( 1, &event ); \ \ event = async_work_group_copy((__global TYPE*)(dst+copiesPerWorkgroup*get_group_id(0)), (__local const TYPE*)localBuffer, (size_t)copiesPerWorkgroup, (event_t)0 ); \ wait_group_events( 1, &event ); \ } DEF(char2); DEF(uchar2); DEF(short2); DEF(ushort2); DEF(int2); DEF(uint2); DEF(long2); DEF(ulong2); DEF(float2); DEF(double2); Release_v0.3/kernels/compiler_async_copy_and_prefetch.cl000066400000000000000000000004571223142177000237370ustar00rootroot00000000000000/* test OpenCL 1.1 Async Copies and Prefetch Functions (section 6.11.10) */ kernel void compiler_async_copy_and_prefetch(__global float *p) { prefetch(p, 10); local float l[10]; event_t e[2]; async_work_group_copy(l, p, 10, 0); async_work_group_copy(p, l, 10, 0); wait_group_events(2, e); } Release_v0.3/kernels/compiler_async_stride_copy.cl000066400000000000000000000016121223142177000226010ustar00rootroot00000000000000__kernel void compiler_async_stride_copy(__global char4 *dst, __global char4 *src, __local char4 *localBuffer, int copiesPerWorkItem, int stride) { event_t event; int copiesPerWorkgroup = copiesPerWorkItem * get_local_size(0); int i; event = async_work_group_strided_copy( (__local char4*)localBuffer, (__global const char4*)(src+copiesPerWorkgroup*stride*get_group_id(0)), (size_t)copiesPerWorkgroup, (size_t)stride, (event_t)0 ); wait_group_events( 1, &event ); for(i=0; i>= 8; u3.y = (float) (u & 0xff); u >>= 8; u3.z = (float) (u & 0xff); return u3; } inline uint pack_fp3(float3 u3) { uint u; u = (((uint) u3.x)) | (((uint) u3.y) << 8) | (((uint) u3.z) << 16); return u; } #define HFILTER3(C0, C1, C2, C3, CURR, LEFT, RIGHT)\ float3 C0, C1, C2, C3;\ do {\ const uint4 from = vload4(CURR, src);\ const float3 from0 = unpack_fp3(from.x);\ const float3 from1 = unpack_fp3(from.y);\ const float3 from2 = unpack_fp3(from.z);\ const float3 from3 = unpack_fp3(from.w);\ const float3 l = unpack_fp3(src[LEFT]);\ const float3 r = unpack_fp3(src[RIGHT]);\ C0 = (l+from0+from1);\ C1 = (from0+from1+from2);\ C2 = (from1+from2+from3);\ C3 = (from2+from3+r);\ } while(0) __kernel void compiler_box_blur(__global const uint *src, __global uint *dst, int w, int h, int chunk) { const int x = get_global_id(0); int y = get_global_id(1)*chunk; const int yend = min(y + chunk, h); /* we process a tile in the image */ /* Current line (left (1 pixel), center (4 pixels), right (1 pixel)) */ const int left = max(4*x-1, 0) + y*w; const int right = min(4*x+4, w-1) + y*w; int curr = x + y*(w>>2); HFILTER3(curr0, curr1, curr2, curr3, curr, left, right); /* Top line (left (1 pixel), center (4 pixels), right (1 pixel)) */ const int ytop = max(y-1,0); const int topLeft = max(4*x-1, 0) + ytop*w; const int topRight = min(4*x+4, w-1) + ytop*w; const int top = x + ytop*(w>>2); HFILTER3(top0, top1, top2, top3, top, topLeft, topRight); /* To guard bottom line */ const int maxBottom = x + (h-1)*(w>>2); const int maxBottomLeft = max(4*x-1,0) + (h-1)*w; const int maxBottomRight = min(4*x+4,w-1) + (h-1)*w; /* We use a short 3 pixel sliding window */ const int ybottom = min(y+1,h-1); int bottomLeft = max(4*x-1, 0) + ybottom*w; int bottomRight = min(4*x+4, w-1) + ybottom*w; int bottom = x + ybottom*(w>>2); /* Top down sliding window */ for (; y < yend; ++y, curr += (w>>2), bottom += (w>>2), bottomLeft += w, bottomRight += w) { const int center = min(bottom, maxBottom); const int left = min(bottomLeft, maxBottomLeft); const int right = min(bottomRight, maxBottomRight); HFILTER3(bottom0, bottom1, bottom2, bottom3, center, left, right); const float3 to0 = (top0+curr0+bottom0)*(1.f/9.f); const float3 to1 = (top1+curr1+bottom1)*(1.f/9.f); const float3 to2 = (top2+curr2+bottom2)*(1.f/9.f); const float3 to3 = (top3+curr3+bottom3)*(1.f/9.f); const uint4 to = (uint4)(pack_fp3(to0),pack_fp3(to1),pack_fp3(to2),pack_fp3(to3)); vstore4(to, curr, dst); top0 = curr0; top1 = curr1; top2 = curr2; top3 = curr3; curr0 = bottom0; curr1 = bottom1; curr2 = bottom2; curr3 = bottom3; } } Release_v0.3/kernels/compiler_box_blur_float.cl000066400000000000000000000034231223142177000220630ustar00rootroot00000000000000__kernel void compiler_box_blur_float(__global const float4 *src, __global float4 *dst, int w, int h, int chunk) { const int x = get_global_id(0); int y = get_global_id(1)*chunk; const int yend = min(y+chunk, h); /* we process a tile in the image */ /* Current line (left (1 pixel), center (4 pixels), right (1 pixel)) */ const int left = max(x-1,0) + y*w; const int right = min(x+1,w-1) + y*w; int curr = x + y*w; float4 currPixel = src[left] + src[curr] + src[right]; /* Top line (left (1 pixel), center (4 pixels), right (1 pixel)) */ const int ytop = max(y-1,0); const int topLeft = max(x-1,0) + ytop*w; const int topRight = min(x+1,w-1) + ytop*w; const int top = x + ytop*w; float4 topPixel = src[topLeft] + src[top] + src[topRight]; /* To guard bottom line */ const int maxBottom = x + (h-1)*w; const int maxBottomLeft = max(x-1,0) + (h-1)*w; const int maxBottomRight = min(x+1,w-1) + (h-1)*w; /* We use a short 4 pixel sliding window */ const int ybottom = min(y+1,h-1); int bottomLeft = max(x-1 + ybottom*w, ybottom*w); int bottomRight = min(x+1 + ybottom*w, ybottom*w+w-1); int bottom = x + ybottom*w; /* Top down sliding window */ for (; y < yend; ++y, curr += w, bottom += w, bottomLeft += w, bottomRight += w) { const int center = min(bottom, maxBottom); const int left = min(bottomLeft, maxBottomLeft); const int right = min(bottomRight, maxBottomRight); const float4 bottomPixel = src[left] + src[center] + src[right]; const float4 to = (bottomPixel + currPixel + topPixel) * (1.f/9.f); dst[curr] = to; topPixel = currPixel; currPixel = bottomPixel; } } Release_v0.3/kernels/compiler_box_blur_float_ref.bmp000066400000000000000000001400661223142177000231040ustar00rootroot00000000000000BM66(NNNOOO\\\uuuqqqjjjpppIII:::000111000000111444222555888===999333000...222555555:::TTT~~~TTTFFF---...+++)))'''+++999LLL^^^kkkppp^^^MMMMMMfff}}}ҩbbbfffddd^^^VVVMMMCCC555---LLLŷXXXEEECCCGGGOOOZZZ```XXXPPPQQQ```hhheeeSSSCCC999<<<===NNNooonnnfffqqqNNNAAA222......000000444222777999>>>:::555000///...///333CCCbbb}}}sssPPPDDD...000...***((()))444FFFYYYhhhppphhhUUULLLYYYrrrśuuu```bbb```ZZZRRRHHH>>>555444PPPmmmOOOEEEJJJOOOWWW^^^[[[VVVSSS]]]dddcccUUUEEE:::***+++???hhhkkkcccwwwYYYHHH888............444333999:::>>>888333///000***222>>>[[[kkkssseee```KKKDDD333444///+++***(((...<<>>:::333000---+++555MMMccciii\\\OOOGGGDDDEEE>>>:::///---,,,)))+++666FFF[[[iiiuuuqqq\\\RRRYYYtttժ~~~NNNNNNRRRUUUWWWXXXZZZ\\\___jjjzzzŢzzz\\\TTTTTTTTTWWW\\\___]]]ZZZ______ZZZIII>>>"""###<<>>888111888HHHaaaqqq{{{ooobbb\\\jjj|||ңsss̫rrrhhhaaa^^^\\\YYYYYY^^^___```___^^^[[[###)))LLL}}}nnnVVV___uuuQQQ///---...---444555777:::;;;;;;333666222222---;;;@@@DDDAAASSSiiirrrwwwdddcccLLLMMMIIIMMMOOOLLLGGG<<<111444AAAXXXkkkyyyuuubbb\\\bbbyyyȒ{{{nnnҷyyymmmdddaaa___^^^[[[[[[___bbb```^^^ZZZ!!!,,,RRRjjjRRR___qqqOOO222///------222777999@@@===???777888555///333::::::999BBBaaavvv~~~uuubbbggg[[[YYYKKKLLLRRRRRRLLL???222222<<>>CCC<<<666222111666???UUUhhhuuu^^^UUUUUUsssӖ[[[CCCgggӿvvv___^^^dddkkkjjjaaa]]]```ddddddccc$$$444___^^^MMMbbb}}}[[[???111...---...///???]]]\\\NNN///333111///...555BBBXXXXXXVVV___vvvhhhttt{{{sssRRR<<>>000444777666333555333444222888KKK\\\VVVMMMQQQkkk}}}ٵzzz???...CCC```~~~|||ϯ___WWW___cccgggeeeaaa______bbbJJJ```~~~YYYNNNfffhhh444(((***333888:::444>>>???BBB222<<<888LLLJJJOOOIIILLLTTT```nnnxxx~~~mmmjjj```VVV>>>---333888777333666555666333777JJJ\\\VVVMMMPPPiii{{{ÍUUU000444@@@\\\www{{{}}}}}}һfffUUUXXX```dddhhhgggccc___```dddsssoooQQQMMMhhhppp:::...---444999<<<888<<<>>>DDD???DDD@@@HHHHHHHHHGGGMMMWWWhhhuuuuuuqqqppp|||bbbHHH000555888777333444444444444888IIIUUUNNNHHHOOOgggyyy|||ãnnnIII:::===AAA[[[ttt~~~yyy}}}{{{|||ǥrrrUUUQQQ]]]```dddeeeddd^^^\\\|||zzz^^^IIIKKKjjjtttDDD444---///:::EEEKKKKKKIIIKKKDDDGGGAAAKKKHHHJJJHHHVVV^^^pppyyy{{{kkkgggeeennnXXXGGG444666888666444666666555444999GGGLLLEEEBBBPPPgggvvvyyy~~~¢tttRRR???@@@BBBEEE]]]vvv}}}vvvppptttvvvrrr͸]]]IIISSSXXX[[[]]]___[[[WWWxxxeeeNNN@@@JJJlll^^^EEE,,,+++888AAAIIINNNNNNWWWIIINNNEEERRRXXXXXXRRRVVV^^^qqq{{{}}}kkkbbbiiifffWWW>>>777555888666555666666444666999DDDBBB>>>@@@SSShhhuuuxxx}}}âpppWWWKKKKKKGGGDDDFFF^^^wwwyyypppgggmmmrrrmmmxxxĞoooOOOJJJNNNQQQTTT[[[YYYUUUttt```OOO???<<>>;;;AAAVVVkkkuuuwwwzzzȹjjjJJJEEENNNOOOGGGEEEGGGeee~~~|||uuunnnfff___dddkkkgggrrr̺gggKKKDDDHHHLLLUUUVVVUUUmmmfffYYYIII@@@:::???QQQrrr~~~wwwjjjXXX:::222999;;;555EEEKKKVVVCCCPPPWWWdddfffddd___\\\___nnnsss|||ttt___OOOKKK??????444888555666333222000444666<<<;;;===DDDXXXjjjuuuzzz}}}ƹhhhEEE:::KKKVVVTTTKKKIIIIIIhhh|||xxxvvvqqqmmmiiihhhggglllpppkkkvvv˳gggLLLCCCCCCMMMSSSXXXRRRKKKAAA777666999EEEYYYxxxtttkkkTTT===666:::@@@777@@@AAAGGGDDD]]]gggcccSSSUUUcccgggqqquuusss{{{xxxMMM<<<>>>???@@@333666333555222222222666777:::888;;;DDDWWWjjjwww}}}rrrUUU???000777MMMYYYWWWMMMIIIJJJhhh||||||wwwtttqqqoooooopppqqqsssvvvwwwwwwrrr~~~ȱeeeIII:::===HHHRRR999555111---333>>>PPPeeeyyyrrrXXX???;;;;;;DDD888===999777GGG```mmm___WWW\\\pppqqq}}}sssmmmqqqoooccc???333<<<======444666555333000111333777999;;;888>>>DDDTTTbbbpppzzzqqqVVVCCC555111111666MMMYYYYYYOOOJJJJJJcccuuuuuurrrssstttvvvyyy}}}zzzŲ```>>>222:::CCC111///---+++444CCCWWWlll|||yyypppWWW;;;;;;;;;AAA<<<;;;555000HHH^^^dddSSSTTTYYYooopppyyyjjj]]]KKK555000999999999222333333111000000444888;;;;;;999>>>AAAHHHPPP\\\eeelllttt|||vvvSSSCCC666555444222000444HHHTTTWWWNNNGGGDDDZZZyyyyyyxxxyyy|||}}}ŭgggJJJ===888......---...888III^^^sssmmmccc___VVV@@@@@@@@@FFF===<<<666333MMMXXXZZZNNNaaajjjuuusssxxxoooZZZJJJ777000222777777999444666555333111111555999:::999:::===<<<>>>CCCQQQ\\\hhhsss|||qqqTTT666888555777666666333555IIIRRRVVVMMMFFFAAAUUUvvv~~~{{{ð|||iii]]]---------000===NNNcccuuukkkWWWVVVTTTKKKIIIFFFFFF<<<;;;777777OOOQQQVVVSSSkkkrrrssspppwwwuuuxxx^^^III111111333666555888444444333222111333666:::<<<999<<<;;;<<<<<>>777555777???UUUeeeppprrrwwwlllnnndddllleeewwwEEE000///333444555111111111222333888666:::<<<===AAACCCNNNXXXcccooovvv~~~cccCCC...///222777999666333BBBKKKQQQFFFCCC>>>TTTrrr~~~666444111222AAARRRhhhxxxhhhVVVYYYaaaXXXJJJGGGBBB;;;444444DDD[[[nnntttmmmkkkgggjjjhhhmmmjjjbbb@@@000444333222000000222333666999888888999>>>EEENNN[[[fffooovvv|||uuuVVV555...---333333333///@@@GGGPPPFFFEEE???UUUppp:::888666666???MMMbbbvvvjjjVVV\\\fff___JJJNNNFFF@@@888===SSSkkkxxxyyyeeegggmmmsssoooqqqttt~~~XXX===555444222111111333444999:::<<<;;;>>>BBBLLLXXXeeemmmsssxxx~~~hhhEEE333...222444444000AAAGGGRRRDDDAAA999TTTooo999999777:::>>>JJJ[[[vvvqqqYYY[[[fffeeeQQQZZZOOOGGG;;;BBBRRRhhhtttuuu]]][[[dddjjjuuu|||oooKKK333222///111000333444;;;<<<>>>>>>AAAGGGQQQbbbmmmtttwww{{{}}}[[[===///222222444111FFFKKKVVVFFF???666SSSooo999:::;;;===???IIIYYYvvv{{{aaaaaagggppp^^^ZZZFFFBBB<<>>AAAIII[[[xxxzzziiiaaaiiisssiii___DDDAAA;;;???AAA]]]fffrrr]]]\\\VVVQQQYYYnnn~~~zzzkkkRRR===///333222555444<<<:::???AAAIIISSS___rrryyy}}}~~~wwwlllmmmsssuuuyyy~~~xxxXXX;;;000///333555GGGLLLRRRGGG>>>444RRRppp<<<===@@@AAACCCHHH[[[wwwwwwgggdddiiiwwwkkkYYYBBBAAABBB<<<===VVVaaakkkWWWUUURRRQQQZZZbbbnnntttaaaFFF000333222222333<<<:::AAAEEEQQQZZZeeexxx|||wwwoooqqqxxx{{{~~~dddGGG111///333777@@@GGGKKKFFF<<<111PPPppp@@@???BBBAAADDDGGG\\\xxxmmm]]]^^^iiiwwwfff\\\KKKKKKIII<<<===NNN[[[```UUULLLRRRUUUhhh]]]```kkkyyyjjjZZZ@@@333222000333<<<888BBBJJJXXX```hhhyyy|||zzzuuuzzzsssVVV;;;000333777>>>EEEFFFEEE;;;222QQQoooCCCDDDDDDDDDCCCGGG\\\wwwmmmWWWYYYhhhwwwfffRRRNNNKKKPPP===@@@IIIRRRVVVMMMLLLWWWgggyyybbbTTTMMMlllppp]]]JJJ888111///444:::777@@@QQQbbbfffkkkzzz~~~~~~bbbEEE///111444;;;CCCDDDBBB777222TTTnnnFFFGGGGGGEEEBBBDDDXXXtttmmmXXXWWWgggxxxjjjXXXJJJJJJLLL@@@@@@@@@HHHNNNTTTZZZmmmvvv}}}aaaPPPCCCLLLhhhyyylll]]]GGG222222999;;;888AAARRRdddhhhnnnyyy}}}pppSSS666000333;;;AAACCC@@@777666WWWlllzzzIIIKKKKKKHHHCCCFFFXXXtttnnn[[[VVVeeessslllVVVFFFCCCQQQGGGIII???IIIQQQZZZhhhvvvttthhh[[[RRRCCC:::NNNnnn|||uuulll___BBB111888999555BBBQQQeeeiiirrrzzz|||}}}]]]>>>///222888@@@CCC???555<<<___ppptttqqqMMMNNNNNNKKKDDDFFFVVVsssnnnZZZUUU^^^mmmiiiYYYGGGDDDLLLLLLLLLFFFLLLOOO```iiiyyygggSSSSSSUUUOOO777???UUUkkkzzzvvvyyy|||qqqQQQ444777999777FFFQQQdddgggtttyyy{{{nnnNNN666000555<<>>---111:::CCC@@@555JJJiii|||aaa```qqqRRRRRRRRRLLLEEE===GGGaaarrrccc\\\aaaiiicccTTTJJJQQQPPPLLLQQQYYYYYYOOOccc]]]]]]===<<>>HHH[[[^^^nnnwww{{{wwwwwwoooxxxrrrTTT666***555AAA@@@555PPPqqq{{{UUUEEE]]]QQQOOOMMMHHH@@@555;;;TTTyyypppaaa```gggppprrrdddYYYYYY]]]TTTOOOIIISSSPPPiiikkk\\\???///------///777NNN[[[]]]DDD111777TTTssswwwIII''';;;DDDXXX[[[nnnyyy~~~aaaBBB---444@@@<<<666UUUxxxyyyNNN999UUUwwwMMMNNNMMMJJJAAA777<<>>XXX}}}kkkUUUZZZkkk{{{nnnaaaWWW```[[[MMMKKKJJJZZZ^^^]]]FFF<<<000...)))999AAAMMMTTTkkkccchhhFFFCCCFFFiiiPPP:::>>>SSS\\\nnn|||~~~}}}¶rrrUUU<<<555@@@;;;>>>YYY{{{nnnJJJ111JJJeeeKKKKKKHHHGGGAAA:::@@@[[[kkkVVVZZZiiiuuu}}}jjjdddWWW```WWWQQQJJJDDDUUU___```CCC999222111+++:::@@@@@@HHHdddrrruuuQQQ===777RRRrrrIII:::OOO\\\ppp|||³vvv\\\BBB333>>>;;;GGG\\\|||hhhJJJ///AAA\\\zzzKKKJJJGGGFFFBBB<<>>;;;PPPaaa|||aaaHHH222<<>>111555KKKhhhUUURRRQQQNNNKKKDDDIII___lllYYYWWWdddmmmtttsssooojjjhhhXXXMMMEEEMMM[[[___aaaPPP???111...888>>>IIIJJJKKKWWW\\\RRR888******777QQQuuummmRRRZZZqqq|||IJiiiMMM888999666UUUpppyyyTTT333///111DDDaaaZZZXXXTTTOOOJJJDDDIII```lllZZZXXXgggnnnrrrsssuuusssiii[[[LLLBBBHHHUUUbbb]]]TTT???555...:::IIIZZZ\\\WWWSSSMMM@@@111,,,,,,---???YYYpppeeennn}}}Ƴ}}}hhhQQQ;;;777222TTTtttxxxSSS...---...;;;XXXuuuĿ]]]\\\ZZZTTTMMMFFFLLLbbbmmm\\\YYYccciiimmmwww}}}gggPPPAAADDDGGGPPPUUU[[[QQQ>>>111;;;QQQ\\\eee\\\TTTEEE888000------+++555OOOsssrrriiixxxů{{{tttaaaLLL:::777333UUUtttyyySSS111///000444PPPlllaaaaaa^^^WWWOOOHHHNNNdddkkk[[[ZZZeeekkkmmmnnnvvv~~~wwwiiiXXXKKK@@@666GGGOOOdddSSSEEE111999III[[[___dddPPPCCC222000111///---)))<<>>===RRRfff[[[EEE///777EEEXXXccckkkZZZEEE333///111000---(((111KKKtttxxx}}}}}}|||jjjZZZJJJ>>>999<<<;;;YYYppp{{{ZZZ===111111222HHH```}}}bbb``````]]]YYYPPPTTThhhkkk[[[\\\dddjjjhhh]]]YYYXXXeeerrrfffRRRJJJXXXKKKDDD111;;;DDD]]]kkkqqqYYYBBB222222000///+++)))%%%888ZZZ~~~}}}{{{uuuqqqwww{{{zzzzzz{{{}}}rrr]]]EEE888666888999[[[mmm{{{XXXBBB222333333>>>XXXtttaaa______^^^ZZZQQQUUUiiijjjXXXXXX```ffffffaaa[[[TTTRRRkkkeeeOOOGGGGGG@@@>>>===GGG[[[jjjgggOOO888///777555777,,,)))&&&///HHHppp|||ssshhhbbb^^^aaafffrrrvvvzzzzzzjjjjjjjjjBBB111444444555YYYhhhtttOOO???333333333666NNNjjj```^^^^^^]]]VVVNNNRRRjjjhhhUUUUUU]]]cccdddfffggg___QQQhhhZZZEEE;;;DDDHHHMMMFFFWWW]]]\\\CCC000,,,<<<;;;;;;,,,***...,,,<<<[[[jjjNNNIIIGGGJJJMMMqqqmmmggguuurrrOOOSSStttjjj@@@///111///...XXXeeepppHHH>>>555555333222FFFdddbbbaaa___]]]XXXQQQVVVlllhhhUUUSSS\\\bbbdddeeebbb___WWWqqqwwwVVV???>>>IIIQQQLLLRRRRRRUUU@@@111)))999:::999222333999000444IIIjjjrrrNNN;;;999<<<;;;WWWttteee[[[cccyyy®TTTJJJ___gggWWW<<<222222///---XXXhhhrrrIII>>>555444333444>>>[[[wwwaaabbbaaa```[[[UUUZZZmmmiiiWWWTTT\\\bbbdddeeebbb```]]]mmmyyy[[[PPPIIIPPPLLLLLLOOORRREEE<<>>:::KKKUUUXXXLLLWWWjjjooo```___XXXGGG;;;888555///,,,YYYkkkrrrIII===777444111333777SSSnnnddddddcccbbb```\\\aaaqqqjjjWWWUUU]]]cccdddeeebbb```^^^ddduuueee[[[OOOKKKIIIHHHMMMPPPEEECCC222555---888===???333...---000DDDfffrrr^^^VVVQQQXXXRRRSSSJJJZZZhhh|||ttt___QQQPPPMMMHHH:::000,,,XXXkkknnnFFF888555222000444555LLLeeecccdddddddddbbb\\\aaarrrhhhTTTSSS[[[bbbdddfffcccaaa```eeejjjppp}}}xxxccckkkaaaVVVCCCFFFIIIKKKCCCCCC555444///???===;;;++++++,,,***666TTTzzzyyypppxxxmmmdddTTTZZZjjjpppcccWWWMMM:::000+++VVVbbbccc???999:::555111333666FFF```{{{eeedddccccccbbb]]]cccttteeeSSSSSS\\\bbbccceeebbbbbbbbbfffjjjnnnyyykkkIIIZZZgggiiiQQQLLLIIIEEEFFF>>>888...111999666444***+++//////111@@@ccc{{{zzzrrrhhhlll~~~qqqUUU888---///XXX___]]]<<<;;;;;;888444444555===XXXsssccccccccccccbbb^^^cccuuucccRRRQQQ\\\bbbcccdddccceeefffiiimmmpppwwwyyysssbbbggg{{{xxxiiiVVVOOO>>>FFF<<<===333:::===888333------222333444777KKKfff~~~|||ƺXXX777---999```ccc[[[;;;======:::555333222555LLLjjjddddddddddddcccaaaeeevvvfffUUURRR]]]bbbddddddfffgggiiijjjnnnpppuuuuuu}}}yyyppplll|||}}}zzzcccVVV;;;EEE;;;===111888???<<<777///...444555===222???QQQttt;ccc:::...EEEjjjlllWWW888888999999666222111///AAA```|||ddddddccccccaaa^^^dddtttiiiWWWTTT]]]bbbdddfffhhhhhhhhhiiinnnqqqttttttrrrwwwvvv}}}|||qqq___CCCBBB===;;;000777AAABBB<<<222444666;;;@@@888666>>>```пccc===333TTTqqqlllLLL444555888999777222000000999WWWttthhhfffeeebbbaaa]]]cccsssiiiWWWTTT]]]bbbfffggghhhgggfffggglllooorrrqqqqqqppp{{{}}}~~~xxxzzz}}}wwweeeQQQGGG@@@888///111;;;===;;;444444444<<>>gggrrreee???222333555:::<<<999333///---666QQQoooeeefffeeedddccc```eeeuuueeeSSSQQQZZZ___```bbbaaabbbcccfffqqqtttxxxpppjjjuuukkkqqquuuvvvnnniiiaaaPPPFFF;;;:::333///666888???AAA===>>>LLLIII>>>''':::VVVyyy˻ccc===333JJJqqqxxxeee@@@222111444999;;;888333...,,,///EEEccc}}}gggfffdddcccccc___dddsssdddRRRRRRYYY\\\^^^aaaaaabbbdddhhhjjjlllooopppiiiġooouuuuuuyyywwwxxxsss```PPPBBB???777111888>>>CCCBBB:::CCCGGGIII999+++000AAA\\\rrr|||ƳZZZ???<<<]]]|||eee???...///333777;;;999666///,,,--->>>\\\tttkkkiiigggcccccc___fffuuubbbRRRQQQYYY\\\\\\^^^___aaabbbeeehhhkkknnnllleeeϪvvvxxxvvvwwwxxx{{{{{{mmmbbbSSSLLL@@@;;;AAACCCBBB===;;;FFFGGGFFF777///+++333BBBVVVbbbwww©[[[KKKPPPggggggAAA+++---000555:::999888000///,,,777OOOhhhyyyllljjjhhhdddbbb^^^ggguuu```QQQOOOWWW[[[]]]^^^^^^aaabbbdddgggjjjnnnkkkddḓzzz{{{yyyyyyxxxyyywwwsssrrriii\\\JJJCCCHHHGGGAAA888===CCCGGG@@@999444666666@@@FFF[[[lllĺsssZZZUUUdddpppsssLLL++++++...444999999777000///---444FFFaaauuujjjkkkkkkgggdddaaajjjxxxaaaSSSQQQZZZ\\\___```___aaaaaabbbeeehhhkkkeeennnũ}}}{{{yyyzzzxxxxxxssstttyyyssshhhWWWOOOOOOJJJBBB666AAAAAAHHH<<<===999::::::EEEDDDZZZbbb{{{eee[[[^^^rrrvvvnnnFFF555---222555666555222000...222>>>XXXqqqiiikkkkkkiiieeecccjjj{{{cccRRRPPPXXX\\\```aaa___```___aaaeeehhhhhhcccyyyyyyyyy{{{{{{zzzwwwrrrqqqxxxzzzrrreeeUUUQQQHHHCCC:::BBB@@@BBB;;;CCCDDDBBB===OOOLLLbbbYYYuuu{{{[[[VVVaaammmvvvvvvUUU<<>>777666CCCKKKDDD:::LLLNNNYYYEEEbbbqqqggg___bbbmmmwww]]]===---///333444222///000333@@@\\\wwwjjjhhhhhhgggfffhhhooo}}}cccQQQRRRZZZ^^^]]]]]]___```______cccccc^^^vvvzzz}}}}}}{{{xxxxxxvvvwwwzzzxxxzzzvvvooo\\\LLLCCCBBB>>>666333BBBQQQQQQCCCKKKOOOWWW???TTTXXX{{{vvviiihhhwwwYYY999---111333222......///666QQQpppkkkhhhgggffffffgggooo~~~cccRRRSSS[[[^^^^^^^^^```aaa```___``````^^^ŵ{{{zzz{{{{{{zzz{{{zzzyyy~~~ppp```WWWMMMFFF>>>:::???MMMVVVTTTPPPRRRQQQGGGIIIHHHXXXddd~~~XXX<<>>...000@@@^^^qqqgggfffqqq^^^DDD333///111444555333HHHhhhccccccbbbbbbdddjjjuuu___NNNQQQYYY^^^aaa```bbbaaaddddddbbb[[[IJ|||yyyppptttuuu{{{~~~~~~}}}zzznnnaaaSSSZZZkkk~~~wwwmmm^^^YYYNNNDDD999//////>>>cccĹnnnaaadddwwwTTT111///111222222///???^^^bbbbbb```aaabbbhhhsss}}}___NNNPPPXXX]]]`````````___bbbcccbbb[[[˿|||rrruuuvvvwww|||~~~yyyqqqddd\\\iiillldddkkkiii^^^UUURRRPPPCCC\\\zzzȽzzzfffeeezzzxxxCCC---000000000...777RRRvvv````````````bbbgggrrraaaPPPPPPYYY^^^``````aaa```aaabbb___ZZZ~~~¸{{{sssyyy|||{{{{{{pppiiirrr}}}|||yyyyyyxxxhhhooo¿ɹoooqqq}}}PPP---///000//////444GGGhhhaaa______^^^```dddooo|||```OOOOOOYYY___aaaaaaaaa```___aaa```ZZZ}}}yyysssxxxʼ|||nnn>>>///222000000111>>>ZZZ|||```^^^___^^^___bbblll{{{___OOOPPP[[[aaabbbcccbbbccc```bbbaaa[[[yyynnnnnnwwwƳ~~~RRR...222000000///777KKKmmmbbb______^^^___```jjj~~~{{{```RRRSSS^^^aaaccccccaaabbbaaaeeeccc^^^ttthhhgggooo~~~˾ccc000333222000...333AAA___|||___^^^`````````___ggg{{{{{{aaaTTTTTT]]]aaadddcccbbbbbbcccfffeeebbbqqqddddddmmmwwwźrrr???111222///...///666IIIiiiaaa```aaaaaaaaabbbjjj|||{{{bbbTTTTTT[[[```bbbaaa___```bbbddddddaaa~~~rrrfffccchhhqqqyyy~~~SSS000111/////////333:::WWWtttbbbcccdddcccbbbccckkk|||yyy```RRRTTTZZZ___```aaa___```aaaccceeeccctttnnniiihhhlllrrryyyzzzɽΙbbb---...///......111111EEEbbbccccccccccccbbbdddlll}}}zzzaaaRRRTTTYYY]]]^^^```___```aaadddfffeee~~~tttjjjhhhhhhhhhkkkrrrxxxyyy|||֩ooo777///000000...000///:::SSSvvvaaabbbbbbdddbbbdddkkk}}}{{{cccUUUUUUYYY]]]```bbbbbbbbbbbbgggjjjjjjuuupppeeeccceeekkknnnrrrxxxyyy{{{}}}׹JJJ222333000.........444BBBddd```aaaaaacccbbbdddjjj{{{{{{eeeVVVVVVYYY^^^aaaddddddcccdddhhhkkklllkkk~~~kkkbbbcccggglllnnnqqquuuxxxxxxxxx|||З___222444222111...---111666LLLlll___```bbbdddbbbccckkk}}}zzzcccVVVWWW[[[```dddfffeeedddeeehhhlllnnnjjjyyyiiiaaaeeeiiipppssssssuuuwww|||yyyzzz|||±ؠhhh111555555222---...111111;;;YYY}}}______aaabbbaaa```jjj{{{xxxaaaVVVVVV[[[```dddffffffeeeeeeggglllooolllyyygggcccgggjjjooowwwvvvwwwwww~~~|||xxx~~~ƿĵڢiii000666666333......333333111FFFjjj^^^^^^^^^___^^^]]]iii|||xxx___RRRRRRZZZaaadddeeeeeedddeeefffkkknnnlll~~~hhheeeiiikkkooovvvvvvvvvvvv|||{{{zzzų~~~ooolll}}}ڡggg///777777222------222444333===XXX|||]]]\\\\\\]]]\\\[[[gggyyyxxx___PPPPPPWWW^^^bbbcccddddddfffggglllnnnmmmmmmiiijjjlllnnntttuuuxxxyyyzzz}}}~~~~~~϶eeeNNNLLLiii٠eee...444555111///...000333444555DDDfff]]]\\\[[[[[[ZZZYYYfffyyyxxx^^^OOOOOOWWW]]]___aaabbbcccfffhhhlllllllllqqqkkkhhhlllnnnooorrrrrrvvvyyy{{{~~~Ĝ|||wwwlllJJJ333;;;^^^֙\\\+++000333333222000222444555555777PPPqqq\\\[[[ZZZZZZXXXXXXeeeyyyxxx```PPPPPPVVV\\\___bbbccccccgggjjjmmmmmmmmmlllooohhhkkkmmmlllnnnmmmsssyyy~~~{{{׳gggvvvsssRRR888)))666ZZZŇKKK***///333555777555555333444444222???YYY|||\\\[[[ZZZZZZVVVVVVddd{{{xxx```PPPQQQWWW]]]aaabbbbbbaaaeeejjjmmmooooooooottthhhkkklllkkkmmmnnnqqqvvv||||||{{{Λppp^^^yyyvvvUUU;;;+++)));;;eee­xxxCCC)))---333555888666777555555555333555CCCccc~~~ZZZZZZZZZZZZVVVXXXeee|||vvv___PPPRRRWWW___bbbdddbbb```dddjjjmmmpppqqqrrryyyjjjmmmnnnmmmmmmlllnnnsssxxx}}}ڻaaaaaazzzwwwWWW;;;,,,,,,333TTT{{{wwwKKK111+++000333777666666333333333222000444KKKjjjYYYZZZZZZYYYVVVXXXfff|||www^^^QQQRRRZZZ```ccccccbbbaaaccchhhlllooorrrsssrrrnnnooooooqqqqqqpppnnnqqquuu͛lllVVVfff{{{tttTTT999+++111DDD```xxxbbb>>>***---222444555444444222111111///000:::VVVxxxXXXXXXXXXYYYXXXYYYggg|||vvv___TTTTTT]]]bbbcccbbb```bbbeeegggjjjlllrrrtttuuu~~~rrrqqqqqqtttsssqqqooorrrwww}}}̬wwwYYYVVVjjj}}}rrrRRR666,,,:::VVVtttyyyTTT444)))...333888555333000000111111222444EEEbbb~~~___[[[XXXWWWVVVVVVfff|||vvv___TTTUUU```ffffffdddcccccceeefffjjjlllsssuuuwwwwwwzzzyyytttpppooosssttttttrrrppprrruuuzzz{{{Ȭ^^^OOOZZZmmmqqqOOO777333IIIgggkkkFFF///---222888777555111000111000111222;;;PPPkkkjjj```XXXWWWWWWXXXfff|||vvv```TTTVVV___ffffffdddcccccceeegggkkkmmmrrrtttxxxxxxzzzxxxtttooonnnpppqqqsssuuutttsssrrrvvvvvvyyyŨ{{{aaaSSSVVV^^^ooorrrOOO:::>>>ZZZuuu___@@@333666777777666111000000000///000444AAAUUUppp|||lll\\\VVVUUUXXXeee|||uuu^^^QQQSSS\\\ccceeeffffffeeeeeeggglllooorrrtttwwwxxxyyyxxxtttqqqnnnooonnnqqqrrrppppppooottttttxxx~~~Ҳ^^^WWWZZZ]]]cccqqqoooOOO@@@KKKjjjuuuUUU:::333222333666333//////...//////444===JJJ]]]uuuxxxcccXXXUUUZZZfff{{{uuu]]]PPPRRRZZZ```bbbeeeeeeeeeeeehhhkkknnnrrrtttwwwyyyyyyyyyvvvuuuqqqooommmppprrrtttuuuuuuvvvwwwxxxėkkkWWW\\\^^^___dddpppnnnQQQIIIYYYwwwjjjIII444...///444444000......///...111777@@@MMMbbb{{{nnn^^^UUUZZZggg|||vvv]]]MMMOOOXXX___bbbddddddddddddhhhkkknnnqqqvvvxxx{{{zzz{{{wwwyyyvvvsssnnnmmmooorrrvvvyyyyyyyyyyyy~~~ʧ|||ccc___aaabbbbbbiiisssoooYYYZZZkkk|||ZZZ999+++...111333000///...000///111444;;;DDDOOOcccyyy~~~hhhXXXWWWeee{{{www]]]MMMOOOZZZ```bbbbbbbbbcccfffiiilllmmmooouuuwww|||yyyyyyvvvxxx|||zzzuuunnnnnnrrrwwwzzzzzz{{{|||̰lllbbbddddddeeedddiiisssttthhhnnn~~~sssOOO555---...000000111/////////111000555===@@@HHH___{{{ttt^^^XXXeee}}}yyy]]]OOOPPP[[[aaacccaaaaaaaaafffhhhlllnnnpppuuuxxx{{{yyyyyywwwxxx|||xxxmmmoooqqqvvvxxx{{{yyy|||~~~ǿrrreeehhhhhhiiihhhgggiiittt|||yyyjjjGGG000,,,---...111000......111222333999888888CCCUUUgggYYYbbb{{{www\\\OOOQQQ\\\aaaaaa___```aaafffhhhlllnnnqqqvvvwwwyyyyyyzzzzzzxxx~~~|||rrrssstttwwwxxx{{{{{{½ssskkkmmmmmmmmmmmmkkkggghhhssslllGGG000+++...000000...///111333222333222111333555uuubbbdddzzzxxx]]]OOOPPPZZZ^^^``````aaaaaadddfffkkkooorrruuuvvvyyyzzz||||||}}}~~~|||wwwuuuuuuvvvwwwxxx{{{zzz}}}~~~º~~~nnnjjjnnnrrrrrrqqqpppkkkfffhhhsssqqqGGG---+++---//////000000222000000000000...(((lllhhhyyyyyy^^^OOOPPPXXX\\\___```aaabbbdddgggmmmrrruuuuuuvvvwww{{{|||~~~~~~~~~}}}yyy~~~|||zzz{{{~~~}}}sssoooqqqrrrtttrrrrrrppplllhhhiiitttħmmm@@@******...111333111000//////...000///...yyysss||||||bbbSSSRRRYYY]]]aaabbbaaabbbdddjjjnnnrrruuutttwwwyyy{{{{{{|||~~~~~~|||{{{~~~|||}}}}}}ttttttttttttuuutttssspppnnnkkkjjjjjjtttʎRRR***+++///222444333222111222000000///111}}}zzzbbbVVVWWW]]]aaaddddddccccccffflllqqqtttuuuuuuwwwzzz{{{{{{{{{}}}~~~|||}}}~~~|||zzzxxxxxxwwwxxxwwwuuusssqqqpppnnnmmmmmmvvvҝbbb222,,,...000444333333111333222222///111yyydddYYY[[[```ccceeeeeedddfffhhhmmmoooqqqrrruuuzzz|||{{{zzz{{{~~~~~~~~~}}}|||~~~}}}}}}}}}|||zzzzzzyyywwwuuusssqqqooommmooovvvgggDDD333......111222444222333111///...///xxxdddZZZ]]]bbbeeeeeeffffffggghhhlllnnnpppssswww{{{{{{}}}||||||}}}~~~}}}~~~~~~}}}||||||}}}zzz~~~~~~}}}}}}zzz{{{zzzyyyxxxwwwuuurrrnnnpppvvvɧlllZZZBBB222+++...000333333444222///...///yyyeee[[[]]]aaaeeeggggggfffhhhiiikkklllnnntttwww{{{{{{}}}|||}}}}}}}}}}}}}}}||||||}}}|||{{{}}}}}}~~~{{{|||{{{zzzxxxvvvssspppmmmpppxxxָtttppplllYYYBBB111---111555666555222///...000yyydddZZZ\\\aaaeeeggggggfffffffffiiilllpppuuuxxxyyyzzz}}}}}}~~~}}}|||}}}~~~}}}{{{~~~~~~~~~}}}|||~~~~~~~~~{{{{{{}}}~~~{{{{{{|||~~~{{{zzzzzzyyyxxxvvvsssooonnnqqqzzzɢ}}}jjjqqqvvvooo[[[@@@222///555777999666111///...xxxdddZZZ\\\aaadddggggggfffffffffhhhlllrrrwwwyyyyyyzzz}}}~~~~~~|||~~~~~~~~~~~~~~~}}}~~~}}}}}}||||||zzzxxxzzzyyyxxxvvvxxx{{{}}}~~~}}}{{{|||~~~}}}{{{{{{zzzyyyxxxvvvsssooonnnrrr}}}Էtttnnnsss{{{|||ttt[[[BBB333222555666444111111000yyyeee[[[]]]cccfffhhhhhhgggeeedddhhhlllsssvvvzzzzzz|||~~~~~~~~~~~~~~~}}}|||zzzyyywwwzzzzzzzzzyyyzzz}}}~~~|||}}}~~~}}}|||zzzzzzxxxxxxvvvtttpppnnnsss|||Ý}}}ooovvvxxx|||}}}}}}qqqZZZ@@@444222555333333222111yyyfff]]]___cccggghhhhhhggggggfffjjjlllrrrvvvzzz|||{{{}}}}}}~~~~~~~~~|||zzzxxxxxxzzz{{{{{{zzz{{{~~~~~~~~~~~~}}}}}}|||zzzzzzxxxxxxvvvrrrpppttt~~~ϰqqqooowwwzzzzzzyyy}}}}}}pppUUU===///...000111222222{{{fff^^^```eeehhhiiihhhgggggggggjjjmmmqqqtttyyy||||||~~~|||}}}zzzzzzyyy{{{{{{|||||||||~~~|||{{{zzz{{{}}}}}}}}}zzz{{{zzzyyyuuuqqqpppsss}}}վxxxnnnrrrwwwyyyxxxvvvwww}}}|||lllSSS;;;111...000111333zzzggg^^^___dddfffgggffffffggghhhkkkooosssuuuxxx||||||}}}{{{~~~~~~}}}~~~}}}~~~}}}}}}|||}}}~~~{{{zzz|||}}}}}}~~~}}}xxxpppooosss~~~˪sssrrrsssuuuwwwwwwvvvyyy|||yyykkkQQQ<<<111000222333{{{iii___```eeehhhfffffffffggggggiiinnnsssuuuxxxzzz|||||||||~~~~~~~~~}}}{{{|||}}}}}}yyyuuunnnlllrrr~~~Ӻzzzrrrtttuuuvvvvvvuuuvvvyyy|||~~~|||lllSSS>>>333111111{{{kkk___```dddggghhhggggggggghhhkkkppptttuuuxxxyyy{{{{{{}}}|||}}}~~~{{{wwwssslllllluuuǥrrrrrrssstttuuuvvvvvvwwwzzz|||}}}~~~yyyiiiRRR===222---zzzggg]]]^^^dddgggiiihhhhhhggggggkkkoootttvvvyyyzzz||||||~~~}}}~~~yyyuuuqqqoookkkooozzzҷtttooopppsssuuuwwwwwwwwwzzz{{{||||||}}}{{{|||vvvgggOOO;;;000zzzfff^^^___dddfffiiiiiijjjhhhiiiooossswwwwwwzzz|||~~~~~~~~~|||~~~|||}}}{{{xxxuuupppmmmkkkrrràmmmnnnqqqtttvvvvvvvvvxxx{{{~~~}}}~~~{{{{{{yyyyyyqqqfffYYYQQQ|||fff]]]___ffffffhhhhhhjjjiiiiiilllqqquuuwww{{{~~~~~~~~~}}}}}}}}}yyyvvvtttqqqmmmlllvvvҵppplllqqqtttwwwyyyxxxwwwwwwzzz|||zzz|||{{{{{{xxxwwwvvvvvvxxxzzz~~~jjj```ccchhhgggggghhhllljjjjjjlllpppsssvvvyyy~~~~~~~~~~~~~~~|||yyywwwtttqqqlllmmmwwwЪgggllltttyyyzzzzzzwwwwwwxxxyyyyyywww{{{yyyzzzxxxvvvvvv|||Release_v0.3/kernels/compiler_box_blur_image.cl000066400000000000000000000011451223142177000220370ustar00rootroot00000000000000__kernel void compiler_box_blur_image(__read_only image2d_t src, __write_only image2d_t dst) { const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST; const int2 coord = (int2)(get_global_id(0), get_global_id(1)); int2 offset; float4 sum = 0; for (offset.y = -1; offset.y <= 1; offset.y++) { for (offset.x = -1; offset.x <= 1; offset.x++) { sum += read_imagef(src, sampler, coord + offset); } } write_imagef(dst, coord, (1.0f/9.0f)*sum); } Release_v0.3/kernels/compiler_box_blur_ref.bmp000066400000000000000000001400661223142177000217170ustar00rootroot00000000000000BM66(NNNOOO\\\uuuqqqjjjpppIII:::000111000000111444222555888===999333000...222555555:::TTT~~~TTTFFF---...+++)))'''+++999LLL^^^kkkppp^^^MMMMMMfff}}}ҩbbbfffddd^^^VVVMMMCCC555---LLLŷXXXEEECCCGGGOOOZZZ```XXXPPPQQQ```hhheeeSSSCCC999<<<===NNNooonnnfffqqqNNNAAA222......000000444222777999>>>:::555000///...///333CCCbbb}}}sssPPPDDD...000...***((()))444FFFYYYhhhppphhhUUULLLYYYrrrśuuu```bbb```ZZZRRRHHH>>>555444PPPmmmOOOEEEJJJOOOWWW^^^[[[VVVSSS]]]dddcccUUUEEE:::***+++???hhhkkkcccwwwYYYHHH888............444333999:::>>>888333///000***222>>>[[[kkkssseee```KKKDDD333444///+++***(((...<<>>:::333000---+++555MMMccciii\\\OOOGGGDDDEEE>>>:::///---,,,)))+++666FFF[[[iiiuuuqqq\\\RRRYYYtttժ~~~NNNNNNRRRUUUWWWXXXZZZ\\\___jjjzzzŢzzz\\\TTTTTTTTTWWW\\\___]]]ZZZ______ZZZIII>>>"""###<<>>888111888HHHaaaqqq{{{ooobbb\\\jjj|||ңsss̫rrrhhhaaa^^^\\\YYYYYY^^^___```___^^^[[[###)))LLL}}}nnnVVV___uuuQQQ///---...---444555777:::;;;;;;333666222222---;;;@@@DDDAAASSSiiirrrwwwdddcccLLLMMMIIIMMMOOOLLLGGG<<<111444AAAXXXkkkyyyuuubbb\\\bbbyyyȒ{{{nnnҷyyymmmdddaaa___^^^[[[[[[___bbb```^^^ZZZ!!!,,,RRRjjjRRR___qqqOOO222///------222777999@@@===???777888555///333::::::999BBBaaavvv~~~uuubbbggg[[[YYYKKKLLLRRRRRRLLL???222222<<>>CCC<<<666222111666???UUUhhhuuu^^^UUUUUUsssӖ[[[CCCgggӿvvv___^^^dddkkkjjjaaa]]]```ddddddccc$$$444___^^^MMMbbb}}}[[[???111...---...///???]]]\\\NNN///333111///...555BBBXXXXXXVVV___vvvhhhttt{{{sssRRR<<>>000444777666333555333444222888KKK\\\VVVMMMQQQkkk}}}ٵzzz???...CCC```~~~|||ϯ___WWW___cccgggeeeaaa______bbbJJJ```~~~YYYNNNfffhhh444(((***333888:::444>>>???BBB222<<<888LLLJJJOOOIIILLLTTT```nnnxxx~~~mmmjjj```VVV>>>---333888777333666555666333777JJJ\\\VVVMMMPPPiii{{{ÍUUU000444@@@\\\www{{{}}}}}}һfffUUUXXX```dddhhhgggccc___```dddsssoooQQQMMMhhhppp:::...---444999<<<888<<<>>>DDD???DDD@@@HHHHHHHHHGGGMMMWWWhhhuuuuuuqqqppp|||bbbHHH000555888777333444444444444888IIIUUUNNNHHHOOOgggyyy|||ãnnnIII:::===AAA[[[ttt~~~yyy}}}{{{|||ǥrrrUUUQQQ]]]```dddeeeddd^^^\\\|||zzz^^^IIIKKKjjjtttDDD444---///:::EEEKKKKKKIIIKKKDDDGGGAAAKKKHHHJJJHHHVVV^^^pppyyy{{{kkkgggeeennnXXXGGG444666888666444666666555444999GGGLLLEEEBBBPPPgggvvvyyy~~~¢tttRRR???@@@BBBEEE]]]vvv}}}vvvppptttvvvrrr͸]]]IIISSSXXX[[[]]]___[[[WWWxxxeeeNNN@@@JJJlll^^^EEE,,,+++888AAAIIINNNNNNWWWIIINNNEEERRRXXXXXXRRRVVV^^^qqq{{{}}}kkkbbbiiifffWWW>>>777555888666555666666444666999DDDBBB>>>@@@SSShhhuuuxxx}}}âpppWWWKKKKKKGGGDDDFFF^^^wwwyyypppgggmmmrrrmmmxxxĞoooOOOJJJNNNQQQTTT[[[YYYUUUttt```OOO???<<>>;;;AAAVVVkkkuuuwwwzzzȹjjjJJJEEENNNOOOGGGEEEGGGeee~~~|||uuunnnfff___dddkkkgggrrr̺gggKKKDDDHHHLLLUUUVVVUUUmmmfffYYYIII@@@:::???QQQrrr~~~wwwjjjXXX:::222999;;;555EEEKKKVVVCCCPPPWWWdddfffddd___\\\___nnnsss|||ttt___OOOKKK??????444888555666333222000444666<<<;;;===DDDXXXjjjuuuzzz}}}ƹhhhEEE:::KKKVVVTTTKKKIIIIIIhhh|||xxxvvvqqqmmmiiihhhggglllpppkkkvvv˳gggLLLCCCCCCMMMSSSXXXRRRKKKAAA777666999EEEYYYxxxtttkkkTTT===666:::@@@777@@@AAAGGGDDD]]]gggcccSSSUUUcccgggqqquuusss{{{xxxMMM<<<>>>???@@@333666333555222222222666777:::888;;;DDDWWWjjjwww}}}rrrUUU???000777MMMYYYWWWMMMIIIJJJhhh||||||wwwtttqqqoooooopppqqqsssvvvwwwwwwrrr~~~ȱeeeIII:::===HHHRRR999555111---333>>>PPPeeeyyyrrrXXX???;;;;;;DDD888===999777GGG```mmm___WWW\\\pppqqq}}}sssmmmqqqoooccc???333<<<======444666555333000111333777999;;;888>>>DDDTTTbbbpppzzzqqqVVVCCC555111111666MMMYYYYYYOOOJJJJJJcccuuuuuurrrssstttvvvyyy}}}zzzŲ```>>>222:::CCC111///---+++444CCCWWWlll|||yyypppWWW;;;;;;;;;AAA<<<;;;555000HHH^^^dddSSSTTTYYYooopppyyyjjj]]]KKK555000999999999222333333111000000444888;;;;;;999>>>AAAHHHPPP\\\eeelllttt|||vvvSSSCCC666555444222000444HHHTTTWWWNNNGGGDDDZZZyyyyyyxxxyyy|||}}}ŭgggJJJ===888......---...888III^^^sssmmmccc___VVV@@@@@@@@@FFF===<<<666333MMMXXXZZZNNNaaajjjuuusssxxxoooZZZJJJ777000222777777999444666555333111111555999:::999:::===<<<>>>CCCQQQ\\\hhhsss|||qqqTTT666888555777666666333555IIIRRRVVVMMMFFFAAAUUUvvv~~~{{{ð|||iii]]]---------000===NNNcccuuukkkWWWVVVTTTKKKIIIFFFFFF<<<;;;777777OOOQQQVVVSSSkkkrrrssspppwwwuuuxxx^^^III111111333666555888444444333222111333666:::<<<999<<<;;;<<<<<>>777555777???UUUeeeppprrrwwwlllnnndddllleeewwwEEE000///333444555111111111222333888666:::<<<===AAACCCNNNXXXcccooovvv~~~cccCCC...///222777999666333BBBKKKQQQFFFCCC>>>TTTrrr~~~666444111222AAARRRhhhxxxhhhVVVYYYaaaXXXJJJGGGBBB;;;444444DDD[[[nnntttmmmkkkgggjjjhhhmmmjjjbbb@@@000444333222000000222333666999888888999>>>EEENNN[[[fffooovvv|||uuuVVV555...---333333333///@@@GGGPPPFFFEEE???UUUppp:::888666666???MMMbbbvvvjjjVVV\\\fff___JJJNNNFFF@@@888===SSSkkkxxxyyyeeegggmmmsssoooqqqttt~~~XXX===555444222111111333444999:::<<<;;;>>>BBBLLLXXXeeemmmsssxxx~~~hhhEEE333...222444444000AAAGGGRRRDDDAAA999TTTooo999999777:::>>>JJJ[[[vvvqqqYYY[[[fffeeeQQQZZZOOOGGG;;;BBBRRRhhhtttuuu]]][[[dddjjjuuu|||oooKKK333222///111000333444;;;<<<>>>>>>AAAGGGQQQbbbmmmtttwww{{{}}}[[[===///222222444111FFFKKKVVVFFF???666SSSooo999:::;;;===???IIIYYYvvv{{{aaaaaagggppp^^^ZZZFFFBBB<<>>AAAIII[[[xxxzzziiiaaaiiisssiii___DDDAAA;;;???AAA]]]fffrrr]]]\\\VVVQQQYYYnnn~~~zzzkkkRRR===///333222555444<<<:::???AAAIIISSS___rrryyy}}}~~~wwwlllmmmsssuuuyyy~~~xxxXXX;;;000///333555GGGLLLRRRGGG>>>444RRRppp<<<===@@@AAACCCHHH[[[wwwwwwgggdddiiiwwwkkkYYYBBBAAABBB<<<===VVVaaakkkWWWUUURRRQQQZZZbbbnnntttaaaFFF000333222222333<<<:::AAAEEEQQQZZZeeexxx|||wwwoooqqqxxx{{{~~~dddGGG111///333777@@@GGGKKKFFF<<<111PPPppp@@@???BBBAAADDDGGG\\\xxxmmm]]]^^^iiiwwwfff\\\KKKKKKIII<<<===NNN[[[```UUULLLRRRUUUhhh]]]```kkkyyyjjjZZZ@@@333222000333<<<888BBBJJJXXX```hhhyyy|||zzzuuuzzzsssVVV;;;000333777>>>EEEFFFEEE;;;222QQQoooCCCDDDDDDDDDCCCGGG\\\wwwmmmWWWYYYhhhwwwfffRRRNNNKKKPPP===@@@IIIRRRVVVMMMLLLWWWgggyyybbbTTTMMMlllppp]]]JJJ888111///444:::777@@@QQQbbbfffkkkzzz~~~~~~bbbEEE///111444;;;CCCDDDBBB777222TTTnnnFFFGGGGGGEEEBBBDDDXXXtttmmmXXXWWWgggxxxjjjXXXJJJJJJLLL@@@@@@@@@HHHNNNTTTZZZmmmvvv}}}aaaPPPCCCLLLhhhyyylll]]]GGG222222999;;;888AAARRRdddhhhnnnyyy}}}pppSSS666000333;;;AAACCC@@@777666WWWlllzzzIIIKKKKKKHHHCCCFFFXXXtttnnn[[[VVVeeessslllVVVFFFCCCQQQGGGIII???IIIQQQZZZhhhvvvttthhh[[[RRRCCC:::NNNnnn|||uuulll___BBB111888999555BBBQQQeeeiiirrrzzz|||}}}]]]>>>///222888@@@CCC???555<<<___ppptttqqqMMMNNNNNNKKKDDDFFFVVVsssnnnZZZUUU^^^mmmiiiYYYGGGDDDLLLLLLLLLFFFLLLOOO```iiiyyygggSSSSSSUUUOOO777???UUUkkkzzzvvvyyy|||qqqQQQ444777999777FFFQQQdddgggtttyyy{{{nnnNNN666000555<<>>---111:::CCC@@@555JJJiii|||aaa```qqqRRRRRRRRRLLLEEE===GGGaaarrrccc\\\aaaiiicccTTTJJJQQQPPPLLLQQQYYYYYYOOOccc]]]]]]===<<>>HHH[[[^^^nnnwww{{{wwwwwwoooxxxrrrTTT666***555AAA@@@555PPPqqq{{{UUUEEE]]]QQQOOOMMMHHH@@@555;;;TTTyyypppaaa```gggppprrrdddYYYYYY]]]TTTOOOIIISSSPPPiiikkk\\\???///------///777NNN[[[]]]DDD111777TTTssswwwIII''';;;DDDXXX[[[nnnyyy~~~aaaBBB---444@@@<<<666UUUxxxyyyNNN999UUUwwwMMMNNNMMMJJJAAA777<<>>XXX}}}kkkUUUZZZkkk{{{nnnaaaWWW```[[[MMMKKKJJJZZZ^^^]]]FFF<<<000...)))999AAAMMMTTTkkkccchhhFFFCCCFFFiiiPPP:::>>>SSS\\\nnn|||~~~}}}¶rrrUUU<<<555@@@;;;>>>YYY{{{nnnJJJ111JJJeeeKKKKKKHHHGGGAAA:::@@@[[[kkkVVVZZZiiiuuu}}}jjjdddWWW```WWWQQQJJJDDDUUU___```CCC999222111+++:::@@@@@@HHHdddrrruuuQQQ===777RRRrrrIII:::OOO\\\ppp|||³vvv\\\BBB333>>>;;;GGG\\\|||hhhJJJ///AAA\\\zzzKKKJJJGGGFFFBBB<<>>;;;PPPaaa|||aaaHHH222<<>>111555KKKhhhUUURRRQQQNNNKKKDDDIII___lllYYYWWWdddmmmtttsssooojjjhhhXXXMMMEEEMMM[[[___aaaPPP???111...888>>>IIIJJJKKKWWW\\\RRR888******777QQQuuummmRRRZZZqqq|||IJiiiMMM888999666UUUpppyyyTTT333///111DDDaaaZZZXXXTTTOOOJJJDDDIII```lllZZZXXXgggnnnrrrsssuuusssiii[[[LLLBBBHHHUUUbbb]]]TTT???555...:::IIIZZZ\\\WWWSSSMMM@@@111,,,,,,---???YYYpppeeennn}}}Ƴ}}}hhhQQQ;;;777222TTTtttxxxSSS...---...;;;XXXuuuĿ]]]\\\ZZZTTTMMMFFFLLLbbbmmm\\\YYYccciiimmmwww}}}gggPPPAAADDDGGGPPPUUU[[[QQQ>>>111;;;QQQ\\\eee\\\TTTEEE888000------+++555OOOsssrrriiixxxů{{{tttaaaLLL:::777333UUUtttyyySSS111///000444PPPlllaaaaaa^^^WWWOOOHHHNNNdddkkk[[[ZZZeeekkkmmmnnnvvv~~~wwwiiiXXXKKK@@@666GGGOOOdddSSSEEE111999III[[[___dddPPPCCC222000111///---)))<<>>===RRRfff[[[EEE///777EEEXXXccckkkZZZEEE333///111000---(((111KKKtttxxx}}}}}}|||jjjZZZJJJ>>>999<<<;;;YYYppp{{{ZZZ===111111222HHH```}}}bbb``````]]]YYYPPPTTThhhkkk[[[\\\dddjjjhhh]]]YYYXXXeeerrrfffRRRJJJXXXKKKDDD111;;;DDD]]]kkkqqqYYYBBB222222000///+++)))%%%888ZZZ~~~}}}{{{uuuqqqwww{{{zzzzzz{{{}}}rrr]]]EEE888666888999[[[mmm{{{XXXBBB222333333>>>XXXtttaaa______^^^ZZZQQQUUUiiijjjXXXXXX```ffffffaaa[[[TTTRRRkkkeeeOOOGGGGGG@@@>>>===GGG[[[jjjgggOOO888///777555777,,,)))&&&///HHHppp|||ssshhhbbb^^^aaafffrrrvvvzzzzzzjjjjjjjjjBBB111444444555YYYhhhtttOOO???333333333666NNNjjj```^^^^^^]]]VVVNNNRRRjjjhhhUUUUUU]]]cccdddfffggg___QQQhhhZZZEEE;;;DDDHHHMMMFFFWWW]]]\\\CCC000,,,<<<;;;;;;,,,***...,,,<<<[[[jjjNNNIIIGGGJJJMMMqqqmmmggguuurrrOOOSSStttjjj@@@///111///...XXXeeepppHHH>>>555555333222FFFdddbbbaaa___]]]XXXQQQVVVlllhhhUUUSSS\\\bbbdddeeebbb___WWWqqqwwwVVV???>>>IIIQQQLLLRRRRRRUUU@@@111)))999:::999222333999000444IIIjjjrrrNNN;;;999<<<;;;WWWttteee[[[cccyyy®TTTJJJ___gggWWW<<<222222///---XXXhhhrrrIII>>>555444333444>>>[[[wwwaaabbbaaa```[[[UUUZZZmmmiiiWWWTTT\\\bbbdddeeebbb```]]]mmmyyy[[[PPPIIIPPPLLLLLLOOORRREEE<<>>:::KKKUUUXXXLLLWWWjjjooo```___XXXGGG;;;888555///,,,YYYkkkrrrIII===777444111333777SSSnnnddddddcccbbb```\\\aaaqqqjjjWWWUUU]]]cccdddeeebbb```^^^ddduuueee[[[OOOKKKIIIHHHMMMPPPEEECCC222555---888===???333...---000DDDfffrrr^^^VVVQQQXXXRRRSSSJJJZZZhhh|||ttt___QQQPPPMMMHHH:::000,,,XXXkkknnnFFF888555222000444555LLLeeecccdddddddddbbb\\\aaarrrhhhTTTSSS[[[bbbdddfffcccaaa```eeejjjppp}}}xxxccckkkaaaVVVCCCFFFIIIKKKCCCCCC555444///???===;;;++++++,,,***666TTTzzzyyypppxxxmmmdddTTTZZZjjjpppcccWWWMMM:::000+++VVVbbbccc???999:::555111333666FFF```{{{eeedddccccccbbb]]]cccttteeeSSSSSS\\\bbbccceeebbbbbbbbbfffjjjnnnyyykkkIIIZZZgggiiiQQQLLLIIIEEEFFF>>>888...111999666444***+++//////111@@@ccc{{{zzzrrrhhhlll~~~qqqUUU888---///XXX___]]]<<<;;;;;;888444444555===XXXsssccccccccccccbbb^^^cccuuucccRRRQQQ\\\bbbcccdddccceeefffiiimmmpppwwwyyysssbbbggg{{{xxxiiiVVVOOO>>>FFF<<<===333:::===888333------222333444777KKKfff~~~|||ƺXXX777---999```ccc[[[;;;======:::555333222555LLLjjjddddddddddddcccaaaeeevvvfffUUURRR]]]bbbddddddfffgggiiijjjnnnpppuuuuuu}}}yyyppplll|||}}}zzzcccVVV;;;EEE;;;===111888???<<<777///...444555===222???QQQttt;ccc:::...EEEjjjlllWWW888888999999666222111///AAA```|||ddddddccccccaaa^^^dddtttiiiWWWTTT]]]bbbdddfffhhhhhhhhhiiinnnqqqttttttrrrwwwvvv}}}|||qqq___CCCBBB===;;;000777AAABBB<<<222444666;;;@@@888666>>>```пccc===333TTTqqqlllLLL444555888999777222000000999WWWttthhhfffeeebbbaaa]]]cccsssiiiWWWTTT]]]bbbfffggghhhgggfffggglllooorrrqqqqqqppp{{{}}}~~~xxxzzz}}}wwweeeQQQGGG@@@888///111;;;===;;;444444444<<>>gggrrreee???222333555:::<<<999333///---666QQQoooeeefffeeedddccc```eeeuuueeeSSSQQQZZZ___```bbbaaabbbcccfffqqqtttxxxpppjjjuuukkkqqquuuvvvnnniiiaaaPPPFFF;;;:::333///666888???AAA===>>>LLLIII>>>''':::VVVyyy˻ccc===333JJJqqqxxxeee@@@222111444999;;;888333...,,,///EEEccc}}}gggfffdddcccccc___dddsssdddRRRRRRYYY\\\^^^aaaaaabbbdddhhhjjjlllooopppiiiġooouuuuuuyyywwwxxxsss```PPPBBB???777111888>>>CCCBBB:::CCCGGGIII999+++000AAA\\\rrr|||ƳZZZ???<<<]]]|||eee???...///333777;;;999666///,,,--->>>\\\tttkkkiiigggcccccc___fffuuubbbRRRQQQYYY\\\\\\^^^___aaabbbeeehhhkkknnnllleeeϪvvvxxxvvvwwwxxx{{{{{{mmmbbbSSSLLL@@@;;;AAACCCBBB===;;;FFFGGGFFF777///+++333BBBVVVbbbwww©[[[KKKPPPggggggAAA+++---000555:::999888000///,,,777OOOhhhyyyllljjjhhhdddbbb^^^ggguuu```QQQOOOWWW[[[]]]^^^^^^aaabbbdddgggjjjnnnkkkddḓzzz{{{yyyyyyxxxyyywwwsssrrriii\\\JJJCCCHHHGGGAAA888===CCCGGG@@@999444666666@@@FFF[[[lllĺsssZZZUUUdddpppsssLLL++++++...444999999777000///---444FFFaaauuujjjkkkkkkgggdddaaajjjxxxaaaSSSQQQZZZ\\\___```___aaaaaabbbeeehhhkkkeeennnũ}}}{{{yyyzzzxxxxxxssstttyyyssshhhWWWOOOOOOJJJBBB666AAAAAAHHH<<<===999::::::EEEDDDZZZbbb{{{eee[[[^^^rrrvvvnnnFFF555---222555666555222000...222>>>XXXqqqiiikkkkkkiiieeecccjjj{{{cccRRRPPPXXX\\\```aaa___```___aaaeeehhhhhhcccyyyyyyyyy{{{{{{zzzwwwrrrqqqxxxzzzrrreeeUUUQQQHHHCCC:::BBB@@@BBB;;;CCCDDDBBB===OOOLLLbbbYYYuuu{{{[[[VVVaaammmvvvvvvUUU<<>>777666CCCKKKDDD:::LLLNNNYYYEEEbbbqqqggg___bbbmmmwww]]]===---///333444222///000333@@@\\\wwwjjjhhhhhhgggfffhhhooo}}}cccQQQRRRZZZ^^^]]]]]]___```______cccccc^^^vvvzzz}}}}}}{{{xxxxxxvvvwwwzzzxxxzzzvvvooo\\\LLLCCCBBB>>>666333BBBQQQQQQCCCKKKOOOWWW???TTTXXX{{{vvviiihhhwwwYYY999---111333222......///666QQQpppkkkhhhgggffffffgggooo~~~cccRRRSSS[[[^^^^^^^^^```aaa```___``````^^^ŵ{{{zzz{{{{{{zzz{{{zzzyyy~~~ppp```WWWMMMFFF>>>:::???MMMVVVTTTPPPRRRQQQGGGIIIHHHXXXddd~~~XXX<<>>...000@@@^^^qqqgggfffqqq^^^DDD333///111444555333HHHhhhccccccbbbbbbdddjjjuuu___NNNQQQYYY^^^aaa```bbbaaaddddddbbb[[[IJ|||yyyppptttuuu{{{~~~~~~}}}zzznnnaaaSSSZZZkkk~~~wwwmmm^^^YYYNNNDDD999//////>>>cccĹnnnaaadddwwwTTT111///111222222///???^^^bbbbbb```aaabbbhhhsss}}}___NNNPPPXXX]]]`````````___bbbcccbbb[[[˿|||rrruuuvvvwww|||~~~yyyqqqddd\\\iiillldddkkkiii^^^UUURRRPPPCCC\\\zzzȽzzzfffeeezzzxxxCCC---000000000...777RRRvvv````````````bbbgggrrraaaPPPPPPYYY^^^``````aaa```aaabbb___ZZZ~~~¸{{{sssyyy|||{{{{{{pppiiirrr}}}|||yyyyyyxxxhhhooo¿ɹoooqqq}}}PPP---///000//////444GGGhhhaaa______^^^```dddooo|||```OOOOOOYYY___aaaaaaaaa```___aaa```ZZZ}}}yyysssxxxʼ|||nnn>>>///222000000111>>>ZZZ|||```^^^___^^^___bbblll{{{___OOOPPP[[[aaabbbcccbbbccc```bbbaaa[[[yyynnnnnnwwwƳ~~~RRR...222000000///777KKKmmmbbb______^^^___```jjj~~~{{{```RRRSSS^^^aaaccccccaaabbbaaaeeeccc^^^ttthhhgggooo~~~˾ccc000333222000...333AAA___|||___^^^`````````___ggg{{{{{{aaaTTTTTT]]]aaadddcccbbbbbbcccfffeeebbbqqqddddddmmmwwwźrrr???111222///...///666IIIiiiaaa```aaaaaaaaabbbjjj|||{{{bbbTTTTTT[[[```bbbaaa___```bbbddddddaaa~~~rrrfffccchhhqqqyyy~~~SSS000111/////////333:::WWWtttbbbcccdddcccbbbccckkk|||yyy```RRRTTTZZZ___```aaa___```aaaccceeeccctttnnniiihhhlllrrryyyzzzɽΙbbb---...///......111111EEEbbbccccccccccccbbbdddlll}}}zzzaaaRRRTTTYYY]]]^^^```___```aaadddfffeee~~~tttjjjhhhhhhhhhkkkrrrxxxyyy|||֩ooo777///000000...000///:::SSSvvvaaabbbbbbdddbbbdddkkk}}}{{{cccUUUUUUYYY]]]```bbbbbbbbbbbbgggjjjjjjuuupppeeeccceeekkknnnrrrxxxyyy{{{}}}׹JJJ222333000.........444BBBddd```aaaaaacccbbbdddjjj{{{{{{eeeVVVVVVYYY^^^aaaddddddcccdddhhhkkklllkkk~~~kkkbbbcccggglllnnnqqquuuxxxxxxxxx|||З___222444222111...---111666LLLlll___```bbbdddbbbccckkk}}}zzzcccVVVWWW[[[```dddfffeeedddeeehhhlllnnnjjjyyyiiiaaaeeeiiipppssssssuuuwww|||yyyzzz|||±ؠhhh111555555222---...111111;;;YYY}}}______aaabbbaaa```jjj{{{xxxaaaVVVVVV[[[```dddffffffeeeeeeggglllooolllyyygggcccgggjjjooowwwvvvwwwwww~~~|||xxx~~~ƿĵڢiii000666666333......333333111FFFjjj^^^^^^^^^___^^^]]]iii|||xxx___RRRRRRZZZaaadddeeeeeedddeeefffkkknnnlll~~~hhheeeiiikkkooovvvvvvvvvvvv|||{{{zzzų~~~ooolll}}}ڡggg///777777222------222444333===XXX|||]]]\\\\\\]]]\\\[[[gggyyyxxx___PPPPPPWWW^^^bbbcccddddddfffggglllnnnmmmmmmiiijjjlllnnntttuuuxxxyyyzzz}}}~~~~~~϶eeeNNNLLLiii٠eee...444555111///...000333444555DDDfff]]]\\\[[[[[[ZZZYYYfffyyyxxx^^^OOOOOOWWW]]]___aaabbbcccfffhhhlllllllllqqqkkkhhhlllnnnooorrrrrrvvvyyy{{{~~~Ĝ|||wwwlllJJJ333;;;^^^֙\\\+++000333333222000222444555555777PPPqqq\\\[[[ZZZZZZXXXXXXeeeyyyxxx```PPPPPPVVV\\\___bbbccccccgggjjjmmmmmmmmmlllooohhhkkkmmmlllnnnmmmsssyyy~~~{{{׳gggvvvsssRRR888)))666ZZZŇKKK***///333555777555555333444444222???YYY|||\\\[[[ZZZZZZVVVVVVddd{{{xxx```PPPQQQWWW]]]aaabbbbbbaaaeeejjjmmmooooooooottthhhkkklllkkkmmmnnnqqqvvv||||||{{{Λppp^^^yyyvvvUUU;;;+++)));;;eee­xxxCCC)))---333555888666777555555555333555CCCccc~~~ZZZZZZZZZZZZVVVXXXeee|||vvv___PPPRRRWWW___bbbdddbbb```dddjjjmmmpppqqqrrryyyjjjmmmnnnmmmmmmlllnnnsssxxx}}}ڻaaaaaazzzwwwWWW;;;,,,,,,333TTT{{{wwwKKK111+++000333777666666333333333222000444KKKjjjYYYZZZZZZYYYVVVXXXfff|||www^^^QQQRRRZZZ```ccccccbbbaaaccchhhlllooorrrsssrrrnnnooooooqqqqqqpppnnnqqquuu͛lllVVVfff{{{tttTTT999+++111DDD```xxxbbb>>>***---222444555444444222111111///000:::VVVxxxXXXXXXXXXYYYXXXYYYggg|||vvv___TTTTTT]]]bbbcccbbb```bbbeeegggjjjlllrrrtttuuu~~~rrrqqqqqqtttsssqqqooorrrwww}}}̬wwwYYYVVVjjj}}}rrrRRR666,,,:::VVVtttyyyTTT444)))...333888555333000000111111222444EEEbbb~~~___[[[XXXWWWVVVVVVfff|||vvv___TTTUUU```ffffffdddcccccceeefffjjjlllsssuuuwwwwwwzzzyyytttpppooosssttttttrrrppprrruuuzzz{{{Ȭ^^^OOOZZZmmmqqqOOO777333IIIgggkkkFFF///---222888777555111000111000111222;;;PPPkkkjjj```XXXWWWWWWXXXfff|||vvv```TTTVVV___ffffffdddcccccceeegggkkkmmmrrrtttxxxxxxzzzxxxtttooonnnpppqqqsssuuutttsssrrrvvvvvvyyyŨ{{{aaaSSSVVV^^^ooorrrOOO:::>>>ZZZuuu___@@@333666777777666111000000000///000444AAAUUUppp|||lll\\\VVVUUUXXXeee|||uuu^^^QQQSSS\\\ccceeeffffffeeeeeeggglllooorrrtttwwwxxxyyyxxxtttqqqnnnooonnnqqqrrrppppppooottttttxxx~~~Ҳ^^^WWWZZZ]]]cccqqqoooOOO@@@KKKjjjuuuUUU:::333222333666333//////...//////444===JJJ]]]uuuxxxcccXXXUUUZZZfff{{{uuu]]]PPPRRRZZZ```bbbeeeeeeeeeeeehhhkkknnnrrrtttwwwyyyyyyyyyvvvuuuqqqooommmppprrrtttuuuuuuvvvwwwxxxėkkkWWW\\\^^^___dddpppnnnQQQIIIYYYwwwjjjIII444...///444444000......///...111777@@@MMMbbb{{{nnn^^^UUUZZZggg|||vvv]]]MMMOOOXXX___bbbddddddddddddhhhkkknnnqqqvvvxxx{{{zzz{{{wwwyyyvvvsssnnnmmmooorrrvvvyyyyyyyyyyyy~~~ʧ|||ccc___aaabbbbbbiiisssoooYYYZZZkkk|||ZZZ999+++...111333000///...000///111444;;;DDDOOOcccyyy~~~hhhXXXWWWeee{{{www]]]MMMOOOZZZ```bbbbbbbbbcccfffiiilllmmmooouuuwww|||yyyyyyvvvxxx|||zzzuuunnnnnnrrrwwwzzzzzz{{{|||̰lllbbbddddddeeedddiiisssttthhhnnn~~~sssOOO555---...000000111/////////111000555===@@@HHH___{{{ttt^^^XXXeee}}}yyy]]]OOOPPP[[[aaacccaaaaaaaaafffhhhlllnnnpppuuuxxx{{{yyyyyywwwxxx|||xxxmmmoooqqqvvvxxx{{{yyy|||~~~ǿrrreeehhhhhhiiihhhgggiiittt|||yyyjjjGGG000,,,---...111000......111222333999888888CCCUUUgggYYYbbb{{{www\\\OOOQQQ\\\aaaaaa___```aaafffhhhlllnnnqqqvvvwwwyyyyyyzzzzzzxxx~~~|||rrrssstttwwwxxx{{{{{{½ssskkkmmmmmmmmmmmmkkkggghhhssslllGGG000+++...000000...///111333222333222111333555uuubbbdddzzzxxx]]]OOOPPPZZZ^^^``````aaaaaadddfffkkkooorrruuuvvvyyyzzz||||||}}}~~~|||wwwuuuuuuvvvwwwxxx{{{zzz}}}~~~º~~~nnnjjjnnnrrrrrrqqqpppkkkfffhhhsssqqqGGG---+++---//////000000222000000000000...(((lllhhhyyyyyy^^^OOOPPPXXX\\\___```aaabbbdddgggmmmrrruuuuuuvvvwww{{{|||~~~~~~~~~}}}yyy~~~|||zzz{{{~~~}}}sssoooqqqrrrtttrrrrrrppplllhhhiiitttħmmm@@@******...111333111000//////...000///...yyysss||||||bbbSSSRRRYYY]]]aaabbbaaabbbdddjjjnnnrrruuutttwwwyyy{{{{{{|||~~~~~~|||{{{~~~|||}}}}}}ttttttttttttuuutttssspppnnnkkkjjjjjjtttʎRRR***+++///222444333222111222000000///111}}}zzzbbbVVVWWW]]]aaaddddddccccccffflllqqqtttuuuuuuwwwzzz{{{{{{{{{}}}~~~|||}}}~~~|||zzzxxxxxxwwwxxxwwwuuusssqqqpppnnnmmmmmmvvvҝbbb222,,,...000444333333111333222222///111yyydddYYY[[[```ccceeeeeedddfffhhhmmmoooqqqrrruuuzzz|||{{{zzz{{{~~~~~~~~~}}}|||~~~}}}}}}}}}|||zzzzzzyyywwwuuusssqqqooommmooovvvgggDDD333......111222444222333111///...///xxxdddZZZ]]]bbbeeeeeeffffffggghhhlllnnnpppssswww{{{{{{}}}||||||}}}~~~}}}~~~~~~}}}||||||}}}zzz~~~~~~}}}}}}zzz{{{zzzyyyxxxwwwuuurrrnnnpppvvvɧlllZZZBBB222+++...000333333444222///...///yyyeee[[[]]]aaaeeeggggggfffhhhiiikkklllnnntttwww{{{{{{}}}|||}}}}}}}}}}}}}}}||||||}}}|||{{{}}}}}}~~~{{{|||{{{zzzxxxvvvssspppmmmpppxxxָtttppplllYYYBBB111---111555666555222///...000yyydddZZZ\\\aaaeeeggggggfffffffffiiilllpppuuuxxxyyyzzz}}}}}}~~~}}}|||}}}~~~}}}{{{~~~~~~~~~}}}|||~~~~~~~~~{{{{{{}}}~~~{{{{{{|||~~~{{{zzzzzzyyyxxxvvvsssooonnnqqqzzzɢ}}}jjjqqqvvvooo[[[@@@222///555777999666111///...xxxdddZZZ\\\aaadddggggggfffffffffhhhlllrrrwwwyyyyyyzzz}}}~~~~~~|||~~~~~~~~~~~~~~~}}}~~~}}}}}}||||||zzzxxxzzzyyyxxxvvvxxx{{{}}}~~~}}}{{{|||~~~}}}{{{{{{zzzyyyxxxvvvsssooonnnrrr}}}Էtttnnnsss{{{|||ttt[[[BBB333222555666444111111000yyyeee[[[]]]cccfffhhhhhhgggeeedddhhhlllsssvvvzzzzzz|||~~~~~~~~~~~~~~~}}}|||zzzyyywwwzzzzzzzzzyyyzzz}}}~~~|||}}}~~~}}}|||zzzzzzxxxxxxvvvtttpppnnnsss|||Ý}}}ooovvvxxx|||}}}}}}qqqZZZ@@@444222555333333222111yyyfff]]]___cccggghhhhhhggggggfffjjjlllrrrvvvzzz|||{{{}}}}}}~~~~~~~~~|||zzzxxxxxxzzz{{{{{{zzz{{{~~~~~~~~~~~~}}}}}}|||zzzzzzxxxxxxvvvrrrpppttt~~~ϰqqqooowwwzzzzzzyyy}}}}}}pppUUU===///...000111222222{{{fff^^^```eeehhhiiihhhgggggggggjjjmmmqqqtttyyy||||||~~~|||}}}zzzzzzyyy{{{{{{|||||||||~~~|||{{{zzz{{{}}}}}}}}}zzz{{{zzzyyyuuuqqqpppsss}}}վxxxnnnrrrwwwyyyxxxvvvwww}}}|||lllSSS;;;111...000111333zzzggg^^^___dddfffgggffffffggghhhkkkooosssuuuxxx||||||}}}{{{~~~~~~}}}~~~}}}~~~}}}}}}|||}}}~~~{{{zzz|||}}}}}}~~~}}}xxxpppooosss~~~˪sssrrrsssuuuwwwwwwvvvyyy|||yyykkkQQQ<<<111000222333{{{iii___```eeehhhfffffffffggggggiiinnnsssuuuxxxzzz|||||||||~~~~~~~~~}}}{{{|||}}}}}}yyyuuunnnlllrrr~~~Ӻzzzrrrtttuuuvvvvvvuuuvvvyyy|||~~~|||lllSSS>>>333111111{{{kkk___```dddggghhhggggggggghhhkkkppptttuuuxxxyyy{{{{{{}}}|||}}}~~~{{{wwwssslllllluuuǥrrrrrrssstttuuuvvvvvvwwwzzz|||}}}~~~yyyiiiRRR===222---zzzggg]]]^^^dddgggiiihhhhhhggggggkkkoootttvvvyyyzzz||||||~~~}}}~~~yyyuuuqqqoookkkooozzzҷtttooopppsssuuuwwwwwwwwwzzz{{{||||||}}}{{{|||vvvgggOOO;;;000zzzfff^^^___dddfffiiiiiijjjhhhiiiooossswwwwwwzzz|||~~~~~~~~~|||~~~|||}}}{{{xxxuuupppmmmkkkrrràmmmnnnqqqtttvvvvvvvvvxxx{{{~~~}}}~~~{{{{{{yyyyyyqqqfffYYYQQQ|||fff]]]___ffffffhhhhhhjjjiiiiiilllqqquuuwww{{{~~~~~~~~~}}}}}}}}}yyyvvvtttqqqmmmlllvvvҵppplllqqqtttwwwyyyxxxwwwwwwzzz|||zzz|||{{{{{{xxxwwwvvvvvvxxxzzz~~~jjj```ccchhhgggggghhhllljjjjjjlllpppsssvvvyyy~~~~~~~~~~~~~~~|||yyywwwtttqqqlllmmmwwwЪgggllltttyyyzzzzzzwwwwwwxxxyyyyyywww{{{yyyzzzxxxvvvvvv|||Release_v0.3/kernels/compiler_byte_scatter.cl000066400000000000000000000001671223142177000215540ustar00rootroot00000000000000__kernel void compiler_byte_scatter(__global char *dst) { int id = (int) get_global_id(0); dst[id] = (char) id; } Release_v0.3/kernels/compiler_ceil.cl000066400000000000000000000001711223142177000177730ustar00rootroot00000000000000kernel void compiler_ceil(global float *src, global float *dst) { int i = get_global_id(0); dst[i] = ceil(src[i]); } Release_v0.3/kernels/compiler_chocolux.cl000066400000000000000000000024761223142177000207150ustar00rootroot00000000000000typedef float2 vec2; typedef float3 vec3; typedef float4 vec4; #define sin native_sin #define cos native_cos #define tan native_tan #define normalize fast_normalize #define length fast_length #define mod fmod #define time 10.f inline vec3 reflect(vec3 I, vec3 N) { return I - 2.0f * dot(N, I) * N; } inline uint pack_fp4(float4 u4) { uint u; u = (((uint) u4.x)) | (((uint) u4.y) << 8) | (((uint) u4.z) << 16); return u; } #define OUTPUT do {\ const vec4 final = 255.f * max(min(gl_FragColor, (vec4)(1.f)), (vec4)(0.f)); \ dst[get_global_id(0) + get_global_id(1) * w] = pack_fp4(final); \ } while (0) __kernel void compiler_chocolux(__global uint *dst, float resx, float resy, int w) { vec2 gl_FragCoord = (vec2)(get_global_id(0), get_global_id(1)); vec3 s[4]; s[0]=(vec3)(0); s[3]=(vec3)(sin(time),cos(time),0); s[1]=s[3].zxy; s[2]=s[3].zzx; float t,b,c,h=0.0f; vec3 m,n; vec3 p=(vec3)(.2f); vec3 d=normalize(.001f*(vec3)(gl_FragCoord,.0f)-p); for(int i=0;i<4;i++) { t=2.0f; for(int i=0;i<4;i++) { b=dot(d,n=s[i]-p); c=b*b+.2f-dot(n,n); if(b-c0.0f) { m=s[i];t=b-c; } } p+=t*d; d=reflect(d,n=normalize(p-m)); h+=pow(n.x*n.x,44.f)+n.x*n.x*.2f; } vec4 gl_FragColor=(vec4)(h,h*h,h*h*h*h,1.f); OUTPUT; } Release_v0.3/kernels/compiler_chocolux_ref.bmp000066400000000000000000006000661223142177000217300ustar00rootroot00000000000000BM66(+++LOQTW Z"]$`&c(f+i-k 0n 2q 5t 7w :y<|?BFKR#_4tWÞwQ;{.l&c"] [ Z!\#_'d,j4s>~L^wҖ߿cI6u(eWKB :z 4s/n+i(e%b#_"]![ZXWVUTS 1p 2q 3s 4t@CGIKLKJHD@;{ 6v 1p,j'd#_ZUQMIFCA?=<; ;;<<=>?ACFILPU Z#_'d+i/n 3r 6v 9x ;{<|=|<|;{"^"]"]!\YYY Z Z [!\!\"^#_%a'd*h.m 4s ;{ER&b7wQvҩyV=|+iXK@ 7v/n*g%a!\XURQPOOOPQRTUWY Z**HJMPSVY!\#_%a'd*g,j.m 1p 3s 6u 8x ;{>}@DHNX+iDw[A1p(e#_![ Z ["^%b*h1p:zGWn΋۱lP;{+i [OE=| 6u 1p-k)g'c$a#^!\ [ZXWVUT 2q 3r 4s 5tADHJLMMLIFA<| 7w 2q-k(e$` ZVQMJFDA?=<< ;;<<=>?ACEHLPUZ#_'d+i/n 3r 6v 9y ;{<|=}=|<{#^"]"]!\YYZ Z [ [!\!]"^#_%a'd*h.m 4s;{ER&c8wRwӫwT<{*hWJ? 6v/m)g%a!\XURPOOOOPQRTUWY [)CFILOQTW Z"]$`&c)f+i-l 0n 2q 5t 7w :y<|?BFKS#_4tWÛeG5t*h$`!\ Z Z!\$`(e.l6vAPd֡tV@/m#^RG? 8x 3r.l+h(e%b$`"^!\ [YXWVU 2q 3s 5t 6uAEILNOOMKGC>} 8x 3r.l)f$` [VRNJGDA?=<<;;<<=>?ACEHLPTY#^'d+i/n 3r 6v 9y ;{<|=}=}<|#^"]"]XYYZ Z [ [!\"]"^#_%a'd*h.m 4s<{ER&c8xRxӬtS:z)gWI>~ 6u.m)f$`![WTRPOOOOPQSTVWY [?BEGJMPSVY!\#^%a'd*g,j.m 1p 3s 6u 8x ;{>}@DHNW)g?kͽnL8x,j%a!\ZY ["^&c+i2q<|I[rБݸz[D2q%bUJA :z 4s 0n,j)f'c%a#_"]!\ ZYXWV 3r 4s 5t 6vBFJMOPPOLHD? 9y 4s.m)g%a!\WRNJGDB?><<;;<==>?ACEHKOTY#^'c+h/m 3r 6v 9y;{=|=}=}<|#^"]"]XYYZ Z [![!\"]"^$_%a'd*h.m 4s<{FS&c8xSyӭrQ9y(eUH>} 5t.l(f$` [WTRPOOOOPQSTVXY [>ACFILNQTW Z"]$`&c(f+i-k 0n 2q 5t 7w :y<|?BEJQ!\/nJ{tQ;z.l&b!]ZYZ!\$`(f/m7wCRfˁפ_G5t(eWLC<{ 6u 1p-k*h(e&b$`#^"]![ ZYXW 3s 4t 6u 7vCGKNQRRPNJE@ :z 5t/n*g%b!\WSOKHEB@><<;<<=>>@ACEHKOSX"^&c+h/m 3r 6v 9y;{=}>}=}<|#^"]"]YYY Z Z [![!\"]#^$`%b'd*h/m 4s<{FS'c9xSyӭoO8w'dTG=} 4t-l(e#_ [WTRPOOOOPRSUVXZ!\=?BDGJMPSVX![#^%a'd*g,j.m 1p 3r 6u 8x ;z=}@CGLT%a4tQxS=|/m'c"]YXX Z"]&b+i2q<|I[rАݶ悶bJ7w)gYNE=} 7w 2q.m+i)f'c%a$_"^!\ [ ZYX 4s 5t 6u 7vDHLPRSSROKFA ;{ 5u 0n*h&b"]XSOKHEB@>=<;<<=>?@ACEGKOSX"]&c*h/m 3r 6v 9y<{=}>~>}=|#^"]"]YYZ Z [ [!\!\"]#^$`%b(e+h/m 4t<|FS'd9xSyӭlL6u&cSF<| 4s-k'd#_ ZVSQPOOOPPRSUWX Z!\ ;>@CFHKNQTWZ"]$`&b(e+h-k/n 2q 4t 7v 9y<|?~AEIOW'd8wTxT>}0n'd"]YWWX [#_(e.l6vAPd}՞惷dL9y+i![PG? 9x 4s 0n,j*g(e&b$a#_"]!\ [ZY 4t 5u 6v 7wEIMQSUUSQMHB<| 6v 0o+i&c"]XTOLHEB@>=<;<<=>?@ACEGJNSX"]&b*h.m 3r 6v 9y<{=}>~>~=}#^"]XYYZ Z [ [!\!\"]#^$`%b(e+h/m 4t<|FS'd9yTyԭiJ4t%aRE ;{ 3r,j'd#^ZVSQPOOOPQRSUWY Z!\ :<?ADGJMORUX ["^%a'd)g,j.l 0o 3r 5u 8w :z=}@BFJPY)f9xRvңݖ߭tS=}0n'd"]XVUVX!\%a*g1p:zGWlΈڪ傶dL:z,j"]QH@ :z 5t 1p-l+i)f'd%b$`#_"]!\ [Z 5t 6u 7v 8wFJORUVVURNIC=} 7v 1p+i'c"^YTPLIFC@>=<;<<=>?@ACEGJNRW"]&b*h.m 2q 6v 9y<|=}>~>~=}#^"]XYY Z Z [![!\!]"]#^$`&b(e+i/m 5t<|FS'd9yTyԭeH3r$`QD :z 2q,j&c"^YVSQPOOOPQRTUWY [!] 8 ;=@CEHKNQSVY!\#_&b(e*h-k/n 1p 4s 6v 9x;{>~ADGKR Z)f7vJd֖ߢރvptу؟mP<|/n'c!\XUTUVY"]&c,j4s>~L]tѐݳcL:z-k"^SIB;{ 6v 2q/m,j*g(e&c%a$`#^"]!\ [ 5t 6u 7v 8wFKPSVXXVSOJD>~ 8w 1p,j'd#^YUPMIFC@>=<;<<=>?@ACEGJNRW!\%b*g.m 2q 6v 9y<|>}>~>~>}#^"]XYZ Z Z [![!\"]"^#_$`&b(e+i/n 5t<|FS'd9yTyӭ쉻bE1p#^OC 9y 1p+i&c"]YUSQOOOOPQRTVWY ["] 7 9<>A 3 4 6 7 9;=@BD$`&c)f+i-l 0o 2q 5t 7w :y<|?ADHLR Z(e2rAP_ilib[XZdvґݶ腸dK:y.l&b![WTSSTVZ#^'d.l6vAPbyԖ߸z`K:y-k#^SJC<| 7w 3r 0n-k+h)f'd&b%a$_#^"]!\ 5u 6v 7w 8xGLQTWYYXUPKE?~ 8x 2q,j'd#_ZUQMJGDA>=<;<<=>?@BCEGJMRW!\%b*g.l 2q 6v :y<|>~?~?~>~#^"]YYZ Z [ [!\!\"]"^#_$`&b(e+i/n 5t<|FS'd9ySyӬꄷ^C/n!\NB 9x 1p*h%b!]XURPOOOOPQSTVXZ!["] 5 8 : 0 1 2 3 5 7 9 ;=?BDFIKL,j.m 1p 3r 6u 8x ;z=}@BEIMRY%b-l6v@GKKIFEGNYk̓؟tZF7v,j%a ZVSRQRTV Z$_)f/n8xDSf}ՙ菿t\H9x,j#^TKC=} 8x 4s 1o.l,i*g(e'c%b$`#_"^!] 6u 7v 7w 8xHMRVY [ [YVRLF? 9x 2q-k(e#_ ZVRNJGDA?=<;<<>?@ABCEGJMRV!\%a*g.l 2q 6v :y<|>~??>~#^XYYZ Z [![!\!\"]"^#_$`&b(e+i/n 5t<|FS'd9ySxӫZ@-l [MA 8w 0o*g%a!\XTRPOOOOPQSUVX Z!\"^ 4// 0 0 1 3 4 6 8 :=?BDGIKMOQ/n 2q 4s 6v 9x;{>~@CFIMRW#^(e.l3r6v8w8x8w8w9y>~FQasчٚ㎾xbO?3r*g#_YURPPPQSV [$`*g1p:zFUg}՘ℸlWE7v+i"^TKD>~ 9y 5t 2p/m,k+h)f(e&c%b$`#_"^ 6u 7v 7w 8xHNSW Z"]"] [WSMG@ 9y 3r-k(e$_ ZVROKHDA?=<;<=>?@ABCEGJMQV!\%a)g.l 2q 6v :y<|>~??>~#^XYY Z Z [![!\!\"]#^#_$a&c(e+i/n 5t=|FS'd9xSxөzW=}+iYK@ 7v/n)g$a![WTRPOOOOPRSUWX Z!\#^//// 0 1 2 3 5 7 :<?BEGJLNPRSU 2q 5t 7w :y<|>~ACFIMQU Z$_'d*g,j-k-k.l0n3r8w?IVcpzԀր{pbSE9x/m'd"]XTQONNOQSV [%a*h1p;zGUg{ԓޭ叿ydQB5t*h"]SKD?~ :y 6u 2q 0n-l,i*h)f'd&c%a$`#_ 6u 7v 7w 8xIOTX!\#_#_!\XTNHA :y 3r-l(e$` [WSOLHEB?=<;<=>?@ABCEGIMQV![%a)g.l 2q 6v :y=|>~???~"^XYZ Z [ [!\!\"]"]#^#_%a&c(e+i /n 5t=|GS'd9xRwҨtS;{*gWJ?~ 6u.m)f$` [WTQPOOOOPRSUWY [!]#^///// 0 1 2 4 6 9<?BEHKMPRTUVWX 5u 8w :z=|?ADGILPSW ["^$`%a&c'd)f+i.m3s:zBKS[_a^XOE;{3r+i%a [VRPNMMMNPSV [%a*h2p;zFTdvҋ۠l[K=}2q(f!\SKE? :z 7v 3r 1o.m,k+i)g(e'd&b%a$` 6v 7v 7w 8wJOUY"^&b%a"^ZUOHA :z 4s.l)f$`![WTPMIFB?=<;<=>?ABCDEGIMQV [%a)g.l 2q 6v :z=|?~@@?=}YYZ Z [![!\!\"]"]#^$_%a&c(f+i 0n 5t=|GS'd8xRvҦoO8x(eUH=} 5t.l(e#_ ZVSQPOOOPQRTUWY ["]#_//.../ 0 1 3 6 8 ;>BEILOQTVWXYZ Z 6u 8x ;z=}?BDGILORTWY [!\"^$_%b(e+i0n5t:z@EIJIFA:z3s-k'd"^YTQNMKKKLMORV [$`*h1p:yDQ_o֐ݡފ}o`RE9y/n'c ZRKE? ;{ 7w 4s 2p/n-l,j*h)f(e'd&b%a 6v 7v 7w 8wJPU Z$`)f'd#_ [VPIB ;z 4s.l)f%a!\XTQMJFC@=< ;<=>@ABCDEGIMQV [$a)f.l 2q 6v :z=}?@@?>}YYZ Z [![!\!\"]"^#^$`%a&c)f,i 0n 5u=|GS'd8xQtѣiK6u&cSG<| 4s-k'd#^ZVSQOOOOPQRTVXZ!["]#_ 0/..../ 1 2 5 7 ;>BEIMPSVXY Z![!\!\!\!\ 9x ;{>}@BDGIKNPRTVWY [!\#_&b(f,j/n3r7v9y;z:z9x5u1p-k(e$` [VSPMKJIIJJLNQUZ$_)f0n8wALXer֊۔ޚဵր|uk`UI>~4t,j%aXQJE@;{ 8w 5t 2q 0o.m-k+i*h)f(e'c%b 6v 7v 7w 8wKQV!\&c-k+h%a!\WQJB ;{ 4s.m)f%a!\YURNKGC@=< ;<=>@ACCDEGIMQU [$`)f.l 2q 7v :z=}?@@?>}YY Z [ [!\!\!]"]"^#_$`%a&c)f,j 0n 5u=|GS'c8wPsѡ싼dH3r$`QE ;z 3r,j'c"^YUSQOOOOPQSTVX Z!\"^$_ 0/..... 0 1 4 6 :=AFJNQUX Z!\"]"^#^#^#^"^"] 9y<{>~@BDGIKMOQRTUWX Z!]#_&b(e+h-k/m0n0o/m-k*h'd$`!\XTQNLJIHHHHIKMPTX#^'d-l5t=|FPZdmtzdghfb[SJA8x0o)f"^VPJD@<| 9x 6u 3r 1p/n.l,j+i*g)f'd&c 6v 7v 7w 7wLRW"])g3r/m&c"]XRKC;{ 4t.m)g%a"]YVSOLHD@=< ;<=?@BCDEFGILPU [$`)f.l 2q 7v :z=}?@@@>~YZ Z [ [!\!\"]"]#^#_$`%a'c)f,j 0o 5u=}GS&c7wPqО郷_D0o"^OC :y 2p+i&b"]XURPONOOPQSUVX Z!\#^$` 1 0/.--./ 0 3 5 9=AEJNSVZ!\#^$`$a%a%a$a$`$_#_ :y<|>~@BDFHJLNOQRTUWX Z!\#_%a&c'd(e(f(e'c%a#^ [XUROMKIHGFFFGHILORW!\%b+h1p8w?GNU[`OSTTQMG@9y2q,j&b [TNID@<| 9y 6v 4s 2q 0o/m-l,j+i*g(f'd 6v 7v 7v 7vLRX#^-l<{5t(e#^YSKC<{ 5t/m)g%b"^ ZWTPMHDA>< ;<=?ABCDEFGILPU Z$`)f.l 2q 7v :z=}@AA@>~YZ Z [![!\!\"]"^#^#_$`%b'd)f,j 0o 5u=}GS&c7vOoϛ|Z@.l![MB 8x 1o*h%b!\XTRPONOOPRSUWY ["]#^$` 2 0/.---./ 1 4 8<@EJOTX!\#_%a&c'c'd'd&c&b%a$`$_ :y<|>~@BDFHJKMNPQRTUVXZ [!]"^#^#_#^"]!\ZWURPMKJHGFEDDDEFHJMPTY#_'d,k2q8w=}CG:y?~BDDC@<|7w2q-k(e#^XRMHD@=| :y 7w 5t 3r 1p 0n.m-k,j*h)g(e 6v 6v 7v 7vMSY$`3rI<|*g$` [TLD<| 5t/m*g&b#^ [XUQNIEA>< ;<=?ACDEFFHJLPU Z$`)f.l 2q 7v ;z>}@AA@>~Y Z Z [!\!\"]"]"^#^#_$`%b'd)f,j 0o 6u=}FS&b6vMmΘuT=|+iYK@ 7v /n)g%a![WTQPONOOPRSUWY ["]#_$a 3 1 0/.---. 0 3 6 :?DJOUY"]%a'd(e)f)g)g)f(e'd&b%a$` :z<|>~@BDFHIKLNOPQRSUVWXYYYYXWVTRPNLJHGFDDCBBCCDFHKNRV [$`(e,j0o4t8w.l2q5t7w8w7w6u3r0n,j(e$` ZUPKGC@=} :z 8w 6u 4s 2q 1p/n.l-k+i*h)f 6u 6v 6v 6vMTZ&b<{^G,j%a!\VMD<| 5t/m*g&c#_!\YVSOJFA>< ;;=?ACEEFGHJLPU Z$`)f.l 2r 7v ;z>~@AA@?~Y Z [ [!\!\"]"]"^#_$_$`%b'd)g,j 0o 6u=}FS&b6uLk͔nP9y)fWJ?~ 6u.m)f$` [WSQOONOOQRTVXZ!\"]#_%a 4 3 1 0.----/ 1 5 9>CIOU Z#_&c)f*h+i,j,j+i*h)f(e&c%a$` :z<|>~@BDEGIJKMNOPQRSSTUUUUTTRQONLJIGFDCBBAAAAABDFHKORV [#_&c)g [$`(e*h-k.l.l-k,i)g'c$_ [WRNJFC@=} ;z 9x 7v 5t 3r 2q 0o/n.l,k+i*g 6u 6u 6u 6uNT Z(eH}T.l&c#_XOE<| 5t/n*h&c$_!] ZWTPLGB>< ;;=@BDEFFGHJLPU Z$`)f.l 3r 7w ;{>~@AAA?Z Z [![!\!\"]"^#^#_$`%a&b'd)g,j 0o 6u=}FR%b5tKi̐쌽gK6u'cTH=} 4t-l(e#_ ZVSQONNOPQRTVX Z!\"^$`%a 6 4 2 1/.-,-. 0 3 7<BHNU [$`(e*h-k.l.m.l-l,j+i)g(e&c%a$` :z<|>~@BCEFHIJLMNNOPQQRRRRQQPNMLJIGFECBA@@??~?~?@@BDFHKNRUXLQV Z"^$`&b&c&c%b$`"] ZWSPLIFC@=};{ 9y 7w 6u 4t 3r 1p 0o/m-l,j+h 6u 6u 5u 5tNU [*hYĬf0o(f%b [QF=} 5t/n*h'c$`"^![YVRMGB>< ;;=@BDFGGGHJLPU Z$`)f.l 3r 7w ;{>~@BBA?Z Z [!\!\"]"]"^#^#_$`%a&b'd)g,j 0o 6u=}FR%a4sJgˌۺ胷`F3r$`RF<{ 3r,k'd#^YURPONNOPQSTVX Z!\#^$`%b 8 6 4 2 0/--,-/ 1 5 :?FMT Z$`(f,j/m 1o 1p 0o 0n.m-k+i)g'd&b$a#_ :y<{>}?ACDFGHIJKLMNNOOOOOONMLKJIGFEDBA@?>~>}=}=|=|=}=}>~@ACEGJMODIMQUWY [ [ [YXUSPMJHEB@>~<| :z 8x 7v 5u 4s 3r 1p 0o.m-k,j 5u 5t 5t 5tNU!\-ks|3r+i)f$`UH=} 5u/n*h'd%a#_"] ZWSNHC?< ;;=@CEFGHHIJMPU Z$`)f.l 3r 7w;{?~ABBA? Z [ [!\!\"]"]"^#_#_$`%a&c'd*g,k 0o 6u=}FR$a3rHdʈڴ{ZB/n"^PD :z 2q+i&c"]XURPONNOPQSUWY ["]#_$a&b : 8 6 4 2 0.-,,- / 3 7=CJRY$`(f-k 5t=} :y 4s 2q 0o.m-k+h)f'd%a$` 8w :y;{=}?ABDEFGHIJKKLLMMMMLLKJIHGFEDBA@?>~=}<|;{ ;{ ;z ;z ;z;{<|=}>~@BDF 9y>}BFJMPRSTTTSQPMKIGDB@>~=| ;{ 9y 8w 6v 5t 4s 2q 1p /n.l-k 5t 5t 4t 4sOV!]1p6v/m.m)g ZK>~ 5u/n+h'd%b$`#^!\YUOID?< ;;=@CFGHHHIJMPU Z$`)f.l 3r 8w<{?ABBA? Z [![!\!]"]"^#^#_$_$`%a&c(e*g-k 1o 6u=}FR$`2qGaȄحrT=},k [MB 8x 1p*h%b!\XTRPONNOPRSUWY ["]#_%a&c< : 8 6 4 2 0.-,,. 1 5 :@GOW#^(e.lFHDJ 6u 2q 0o.l,j)g'd&b$`#^ 7w 9y ;{=|>~@ACDEFGHIIJJJKKKJJIIHGFEDCA@?>~=}<| ;{ :z 9y 9y 9x 9x 9x 9y :y ;z<{=}>~@ 5t 8x<|@DGILMOOOONMKIHFDB@?=}<{ :z 9x 7w 6v 5t 3s 2q 1o/n-l 5t 4s 4s 3rOV"]6u:y3s6v3r&cP@ 6u/n+i(e&c%a$`"^ [VQJD?< :;>ADFHIIIIKMPU Z$`)f.l 3r 8w<|?ABBA? Z [!\!\"]"]"^#^#_$`$`%b&c(e*g-k 1o 6u=}FQ$_2pE_֦쎾jN9y*gXK@ 7v 0n)g%a![WTQONNNOPRTUXZ!\"^$`%a&c>< : 8 6 4 1/.,,-/ 2 7=DKS![&b,jKE 4s 1p/m,j*h(e&c$`#^"] 7v 9x :z<|>}?@BCDEFGGHHHIIIHHGGFEDCBA@?>~=}<{ ;z :y 9x 8x 7w 7v 7v 6v 7v 7v 7w 8w 9x :y-l 1p 4s 8w ;{>~BDGHJKKKKJIHGEDBA?>~=|;{ :z 9x 7w 6u 4t 3r 2p 0o.m 4s 4s 3r 3rOV#^<{>}:zCA1oVB 6v/n+i(e'c&b%a$`!]XRKE?< : ;>ADGIIJIJKMPU Z$`)f.m 3r 8x<|?ACCA@ Z [!\!\"]"^#^#_#_$`%a%b&c(e*h-k 1p 6u=|FQ#_1oC\{ԟ焷cI5u'dUI>~ 6u.m(f$` [VSQONNNOQRTVX Z!\#^$`%b'c@?= ; 8 6 3 1/-,,- 0 3 9?GOW#_(f 5ug#_ 6u 1p/m-k*h(e&c$`#^!\ 5t 6v 8x :y;{=}>~@ABCDDEFFFGGGFFFEEDCBA@?>~=}<{ ;z :y 9x 8w 7v 6u 5u 5t 5t 4t 4t 4t 5t 5u(e+h.l 1p 4s 7w :z=}@BDFGHHHHGGFEDBA@?>}<| ;{ :y 8x 7w 6u 4s 3r 1p/n 4s 3r 3r 2qPW#_CBDUYB$`F 7v/n+i)f'd'd&c%a#_ ZTME@< : ;>BEHJJJJJKMPU Z$`)g.m 3s 8x<|@BCCB@ [![!\!]"]"^#^#_$_$`%a&b'c(e*h-k 1p 6u=|EP"^0nAYvҘz\D2q$`RF<| 4s-k(e#_ZVSPONNNOQRTVX [!]#_$`&b'dBA?= ; 8 6 3 1/-,,. 1 5 ;BJR Z%a*h ;z<|VR 5u [ 4t 6u 8w 9y ;z<|=}?~@ABBCDDDEEEEDDCCBAA@?>~=|<{ ;z :y 8x 7w 6v 6u 5t 4s 3r 3r 2q 2q 2q 2q 2q&c)f+i.m 1p 4s 7w :z<|?ACDEFFFFFEDDCBA@?~=}<| ;{ :y 8x 7v 5u 4s 2q 0o 3r 3r 2q 1pPW$`KFPqЀ_/nJ 8w 0n+i)g(e(e(e'c%a!\UNF@< : ;>BFIJKKKKKMQU Z$`)g.m 4s 8x=|@BCCB@ [![!\"]"]"^#_#_$`$`%a&b'd(e*h-k 1p 6u=|EP"].m@VqБqU?.m"]PD ;z 3r,j'c"^YURPONNOPQSUWY ["]#_%a&c'dDCA@= ; 8 6 3 0.-,-. 2 7=DLT<|=}>~?@AABBCCCCCBBAA@?>~>}=|<{ ;z 9y 8x 7w 6v 5u 4t 3s 3r 2q 1p 1p 0o 0o 0o"^%a'd*g,k/n 2q 5t 7w :y<|>~@BCDEEEEEDDCBBA@?~=}<| ;z 9y 8w 6v 5t 3r 1p 3r 2q 1p 0oPW$`TKaț폾AQ 9y 0n,j*g)g)g)g(f&c"^WOG@< : ;>BFJKLLKKLNQU Z$`)g/m 4s 9x=}@BCCB@ [!\!\"]"^#^#_$_$`$a%a&b'd(f*h-k 1p 6u<|EO!\-l>}SlΊ舺hN:z+i ZMB 9y 1p+i&b!]XTQONNNOPQSUWY!["^$_%a&c(eFEDB@> ; 8 6 3 0.-,-/ 3   !?~?@@AAAAA@@@?>~>~=}<|;{ :z 9y 8x 7w 6v 5u 4s 3r 2q 1p 1p 0o/n/m.m Z"]$`&c(f+i.l 0o 3r 5u 8w :z<|>~@ACDDEEEEDDCBBA@?~=}<| ;z 9y 7w 6u 4s 2q 2q 1p 1o 0nPW%a^Pv\Y ;z 0o,j*h*h*h+i*h(e$`YPG@< : ;?CGKLMMLKLNQU [%a*g/m 4s 9y=}@CDCB@ [!\!]"]"^#^#_$`$`%a%b&c'd)f+h-l 1p 6u<|EO![,j<{Og~`H6u(eWK@ 7w 0o*g%a!\WTQONNNOPRSUXZ!\#^$`%b'c(eHGFDC@>; 8 5 3 0.-  !""###$$$$>~?~?????~>~>~=}=}<|<{ ;z :z 9y 8x 7w 6v 5t 4s 3r 2q 1p 0o 0n/m.l-l-kZ!\#_%b(e*h-k/n 2q 4s 7v 9x ;{=}?@BCDDEEEEEDDCBA@?~=}<{ :z 8x 7v 5t 3r 2p 1p 0o/mPW%bhU&b=} 0o,j+i+i,j,k,j*g%b ZQH@< :;?DHLMNMLLLNQU [%a*g/n 4t 9y>}ACDDB@![!\"]"]#^#_#_$`$`%a%b&c'd)f+h-l 1p 6u<|DN Z+i:yLcsXC2q%aTH>~ 6u/m)f$` [VSPONNNOPRTVX Z!\#^$`&b'd(eJIHFECA>; 8 5 2 !"#$%%&&''''''''=}=}=}=}=|<|<|;{ ;z :z 9y 9x 8w 7v 6u 5t 4s 3r 2q 1p 0o/n.m.l-k,jWY!\#_%a'd*g,j.m 1p 3r 6u 8w :z<|>~@ACDDEEFFEEEDCBA@>~=};{ :y 8w 6u 4s 1p 0o/n.lPX&cqZĭ/m? 0o,j+i,j-l.m.l,j'd!\RHC< 9;?DIMNONMLMNQV [%a*h /n 5t :y>~ACDDB@!\!\"]"^#^#_$`$`%a%a&b&c'd)f+i.l 1p 6u<|DNY*g8wI^Ƹ爺jQ>}.m"^QF<| 4s-k(e#_ZVRPNNNNOQRTVY ["]#_%a&c'd(fQLJHGECA>; !"#$%&'(()**+++++++*** ;{ ;z :z :z :y 9y 9x 8w 7w 6v 6u 5t 4s 3r 2q 1p 0o/n.m-l-k,jUWY!\#_%a'd)g,j.l 0o 3r 5u 8w :y<|>~@ACDEFFFGGFFEDCBA@>~<| ;z 9x 7v 5t 3r /n.m-lPX&cx_8xA 1p-k,j-k/m 0o 0o.l)f"]SID= 9;@EJNPPONMMNQV [%a*h 0n 5t :z>~ACDDB@!\!]"]"^#_#_$`$`%a%b&b'c(e)f+i.l 1p 6u<|DMX)f6uFYį}aK9x+i ZNC :z 2q,j'c"^YURPNNNNOQSUWY!["]$_%a&c(e)fXOKIHFD!"#$&'()*+,--./// / ////..-, 9x 8x 8x 8w 7v 6v 6u 5t 4s 3s 3r 2q 1p 0o/n.l-k,j,i+hUWZ!\#_%a'd)g,j.l 0o 3r 5t 8w :y<|>~@BCDEFGGHHGGFFEDBA?=}<{ :y 8w 6u 3s/m.l,kPW'c|bAC 1p-k-k.m 0o 2q 2q /n*h#_TNF= 9<@FKOQQPNMMORV![%b+h 0o 6u :z?~BDEDC![!\"]"^#^#_$_$`$a%a%b&c'd(e)g+i.l 1p 6u<{CLW'd3sBTsYE4t(eWKA 8x 1p+h&b!\XTQONMNNPQSUWZ!\"^$`%b'c(e)f$`RLJ !#$&'(*+,./ 0 1 1 2 3 3 4 4 4 4 4 4 3 3 2 1 0/. 6u 5u 5t 4s 4s 3r 2q 1p 0o 0n/m.l-k,j+i+hSVX Z"]#_%b(e*g,j.m 1o 3r 5u 8w :z<|>~@BDEFGHIIIIHHGFECB@>~=| ;z 8x 6v 4s.m-k,jPW'd|cHC 1p-k-l/n 2q 4s 4s 1p,j$`UQH= 9=AGLPRRQONNORV!\&b+i 0o 6u ;{?BDEDC!\!\"]"^#^#_$`$`%a%a&b&c'd(e)g+i.l 1p 6u<{CLV&c1p?~֟冹iR?0o%aTI? 7v /n*g%a![WSQONMNOPRSVX Z!\#^$`&b'd(e)gK*hT "$%'(*+-. 0 1 2 3 4 5 6 7 8 8 9 9 9 9 9 8 8 7 6 5 4 3 2 0 3r 3r 2q 1p 1p 0o/n.m.l-k,j+i*hRTVX ["]$`&b(e*h-k/m 1p 4s 6u 8x ;z=}?ACEFGHIJJJJJIHGFECA?=};{ 9y 7v 5t-l,j+iPW'cxjbJC 1p-l.m 1o 4s 6u 6v 3r-k%aUUJ= 9>BHMQSSRPNNORW!\&b+i 1p 6v;{?CDEDC!\!]"]#^#_$_$`$a%a%b&b&c'd(e*g+i.l 1p 6u;{BKU%a/n<|yӖߵ|`K:z-k"]QF=} 5t.l(f$_ ZVSPNMMNOPRTVX ["]#_%a&c'd)f*g"^!"$&')+-. 0 2 3 5 6 7 8 : ;;<==>>>>==<< ; 9 8 7 5 4 2 1o 0o/n/m.l-k,k,j+i*h)gRUWY!\#^%a'c)f+i-k 0n 2q 4t 7v 9y<{>~@BDFGIJJKKLKKJJIGFDB@>~<| :z 8w 5u-k,jIPW&cqX^GB 1o.l/m 2q 5u 8w 8x 5t/m&bYYM= 9@EIOSTTRPONPRW!\&c,j 1p 7v<{@CEEEC!\"]"^#^#_$`$`%a%a&b&c'c'd(f*g,j.l 1p 6u ;{BJT#_-k9ysюܪrXE5u)fYND ;z 3r-k'd#^YURPNMMNOPRTWY ["]$_%a&c(e)f*g!#$&(*,./ 1 3 5 6 8 9 ;<>?@AABCCCCCBBA@?>= ; 9 8 6 4.l-l-k,j+i+h*g)fQSUX Z!\#_%b'd)g,j.l 0o 3r 5u 8w :z=|?ACEGIJKLMMMMMLKJHGECA?=} ;z 8x 6u,j+iHPW&cgKX?@ 0o.l 0n 3r 7v :y :z 7v 0o&c"]"]O= :BGJPTVUSQOOPSW"]'c,j 2q 7w<|@CEFEC!\"]"^#_$_$`$a%a%b&b&c'd(e)f*h,j.m 1p 6u ;zAIS"]+i6umΆ٠扻iQ?1p&bVKA 9x 2p+i&c"]XTQONMMNOQSUWY!\"^$`%b'd(e)g*h!"$&(*,. 0 2 4 6 8 9;=?@BCDEFGHHHHHHHGFEDCA?=; 9 7,j+i+i*h)g)fORTVX ["]$`&c(e*h-k/n 2p 4s 7v 9y;{>~@CEGHJKMMNOONNMLKJHFDB@>} ;{ 9x 6v 4s*hHOV%b\@Q4s>} 0n.m 0o 4s 8x<{<| 8x 1p'd%a%aQ= ;EJKRVWVTRPOPSX"]'d,k 2q 8w=|ADEFEC!]"]#^#_$`$`%a%a&b&c'c'd(e)f*h,j.m 1p 5u ;zAHQ![)f3rh֖ߪ䂶aJ:y-k#^SH? 7v 0o*h%a!\WSQOMMMNOQSUX Z!\#^$`&b'd(f)g*h  "$&(*,. 0 2 5 7 9 ;<>@BDEGHIJKLMMNNNMMLKJIGEDA?= ; 8*g)f(eNPRTWY!\#^%a'd)g+i.l 0o 3r 5u 8w :z=}?BDFHJLMNOPPPPOOMLKIGEC@>~<{ 9y 7v 4s*gHOV%aQ|8xH)g ;z/n.m 1p 5u :z=}>~ :y 2q'd(e(eS<<HMMSWXWURPPQTX"^'d-k 3r 8x=}ADFFEC"]"^#^#_$`$a%a%b&b&c'd'd(e)f*h,j.m 1p 5u :z@HPZ'd0ocwӌ۟{ZD5t)g [PF=} 5t.m)f$` [VSPNMMMNPRTVX Z"]#_%a&c(e)f*g+h  "$&(*,. 0 3 5 7 9 ;=?ACEGIJLMOPQRRSSSSSRQPONLJHFCA>; 9'dOQSUX Z"]$_&b(e*h,k/m 1p 4s 7v 9y<{>~ACFHJLMOPQRRRRQPOMLJHFCA?~<| 9y 7v 4s)fGNU$`FY2q@zwY 7w/m/m 2q 6v;{?@;{ 2q'd+h+iT ;>LQNUYZXVSQPQTX#^(e-l 3r 9x>~BDFFEC"]"^#_$`$`%a%b&b&c&c'd(e(e)g*h,j.m 1p 5t :z@GOX%aK]pЃؓޠvS>~1o&cWMC ;z 3s-k(e#_ZURONMMMOPRTVY ["]$_%a'c(e)f*h+i !#%'),. 0 2 4 7 9 ;=?BDFHJLMOQRTUVWXXXYXXWWUTRQOLJGDA?<OQTVY ["^$a'c)f+i.l 0o 3r 5u 8x ;z=}@BEGILNOQSTUUUTSQPNLKIFDA?<| :y 7v 4t(fGNU#^=}hC.l8w^dzLN 4t.m/m 2q 7w=}AC>~ 3r'd-k 1pU :@QTPV Z [YVSQPQTY#_(f.l 4s 9y>~BEFGEB"]#^#_$`$a%a%b&b&c'd'd(e(f)g+h,j.m 1p 5t :y?FMV#_HXjzԉړޚqM9y-k#_TJA 9x 2q,i&c"]XTQOMMMNOPRUWY!\"^$`&b'd(e)g*h+i !"%')+-/ 2 4 6 8 ;=?ADFHJLNPRTUWYZ [!\"]"]"^"^"^"]!]!\ ZYWUSQNKHEB> ;UWZ!\#_%b(e*h,j/m 1p 4s 7v 9y<|?BDGIKNPRUWYZYXVTSQOMKIGDB?<| :y 7v 4t 2qGMT"]5tM6u*g1pIvҖo0nD 2q.l/m 3r 8x>~BMC 3r'c 0oGT 9DVWQX!\!\ ZWTQQRUY$_)f/m 4t :z?CEGGEB"^#_$_$`%a%b&b&c'c'd'd(e)f*g+i,k/m 1p 5t 9y?~ELT!\DTcrֈڎܑݘlG4s)f [QG>~ 7v 0o*h%b!\WTQNMMMNOQSUWZ!\#^$a&c'd)f*g+h+i  "$&(*,/ 1 3 5 8 :<>ACEGJLNPRTVX Z!\"]#^$`%a%b&b&c&c&c&b%b%a$_"^!\ ZXUROLHEA>X ["]$`&c)f+i.l 0o 3r 5u 8x ;{>~ACFHKMPSW [#_%a%a#_!\YVTRPNLIGEB?<| :y 7v 4s 2pFMS![/m<|.l'd+i8xOYAY<| 0n-k/m 3r 9y?C/nL 2q&b 6uRQ 8H [ ZS Z"]"]![XTRQRU Z$`)g/n 5t ;z@CFGGEB"^#_$`$`%a%b&b&c'd'd(e(e)f*g+i-k/m 1p 5t 9x>~DKRZAO]ju}ՂׇٓޱgA0n&bWND<| 5t.m)f$` [VSPNMMMNOQSVX Z"]#_%a&c(e)f*h+i,i !#%')+- 0 2 4 6 9 ;=@BDFIKMORTVX Z!\"^$`%a&c'd(e)f)g*g*h*h*g)g(f'd&c%a#^!\YVSOLHD@=#^%a'd*g,j/m 1p 4s 7v :y=|?BEHJMPTY$`)g.l/n-k)f$` [WURPNLJGEB?<| :y 7v 4s 1pELSZ*g1p(e$`&c,j6u7w(eJ 6v.m-k/n 4s :y@FV 1p&b 8w 4sM 8M$`"]U!\#_#_!\XURQRV [%a*h 0o 6u;{@DFGGEB#^#_$`%a%b&b&c'c'd'd(e(f)g*h+i-k/m 1p 4t 8x=}CIPW>~JWcmtx֑ݾa<{,j#^TKB :z 3r-k(e#_ZURONMLMNPRTVY ["]$`%b'd(e)g*h+i,j  "$&(*,. 1 3 5 7 :<>@CEGILNPRUWY!["]$`%b'c(e)g+h,j,k-k.l.l.l.l-k,j+i*g(e&c$`"] ZVSOKGC?&c(f+i-l 0o 3r 6u 8x;{>~ADGILPT [(e2q<|CB:y0o(e"]XUSQOLJGEB?<| 9y 7v 4s 1pEKRX&b*g%a"]"]$`'c$`R? 2q-k,j/n 4s :z@MX 0o%b 1p*gH 9S(e#_W"^$`$`"]YURQSV [%a+h 1o 7v<|ADGHGEB#_$`$`%a%b&c&c'd'd(e(e)f)g*h+i-k/m 1p 4s 8x=|BHN0o;zFR\dkoxӔ[6v(e ZQH@ 8x 1p+i&c"]XTQOMLLMNPRTWY!\"^$`&b'd(f*g+h+i,j !#%')+-/ 1 4 6 8 :<>ACEGILNPSUWZ!\#^$`&c(e)g+i,j.l/m 0o 1p 1p 2q 2q 1p 1p 0o/m-l,i)g'd%a"^ ZVRNIEA)g,j/m 1p 4s 7v :z=}@CFHKOT!],j=|Sgk_J7v*h#^YVSQOLJGDB?~<| 9y 6v 3r 1oDKQW#^%a"] [YXWQF 8x 0n,j,j/m 4s ;zAYP/m$`,iO= 8X 6v$`Y$_%b%a"^YUSRSW!\&b+i 1p 7w=}BEGHGE#^#_$`%a%b&b&c'd'd(e(e(f)f*g*h+i-k/m 1p 4s 8w<|AFL.l8wBLU]bhuѝT1p%aWNE=} 6v 0n*h%a!\WSPNMLLMOQSUWZ!\#_%a&c(e)f*g+i,j,j! "$&(*,. 0 2 4 6 8 :<>@CEGIKMPRTWY!\"^$`&c(e*g,i-l/m 0o 2q 3r 4s 5t 5u 5u 5u 5t 4s 3r 1p/n-k*h(e%a"]YUPLGC-k 0n 3r 5u 8x;{>~ADGJNS"]/mHqС胷W:z*h#^YVSQOLJGDA>~;{ 8x 6u 3r 0oCJPU ["] [XURNG>} 4s-l+i+i/m 4s ;z@*hD-k"^(eR< 9!]X$`![%a'c%b#^ZUSRTW!]&c,j 2q 8x>}BFHIGE#^#_$`%a%b&c'c'd'd(e(e)f)g*g+h,j-k/m 1p 4s 7w ;{@EJ,j5t>~GOV[atѯM-k"]SKC ;{ 4s.l)f$` [VRPNLLMNOQSVX Z"]#_%a'c(e)g*h+i,j,j! "$&(*,. 0 2 4 6 8 :<>@BDFHJLOQSVX Z"]$`&b(e*g,i-l /n 1p 3r 5t 6u 7w 8x 9x 9y 9y 9x 8w 6v 5t 2r 0o-l+h(e$`!\XSNIE@ 4s 7v :y=}@CFIMR!\/nP\9y)f"]XUSQNLIFCA>} ;z 8w 5t 2q 0nCIOTX ZXVRNHA 8x 1p,j*h+i/m 4s ;z@0o ;z+i [#_U :<$`$_"^&c(e&c#_ ZVSRTX"]'d-k 3r 9y?~CFHIGD#_$`$a%a&b&c'd'd(e(e)f)f)g*h+i,j-k/m 1p 3s 7v :z?~CH*h2q;zCJPT\wE)fYPH@ 9y 2q-k'd#^YUROMLLMNOQTVY ["]$`%b'd(f)g+h+i,j,k!!#%')+-/ 0 2 4 6 8 :;=?ACEGIKMOQTVY!["^$`&c)f+h-k/n 1p 3r 5u 7w 9x :z ;{<|=|=|<|<{ :z 8x 6u 3r 0o-l*h'c#_ ZUPKFA 8x ;{>~AEHKPY-lQU4s&c![WUSPNKHFC@=} :z 7w 4t 2p/mBHMRVWVTOJD<| 5t/m+h)g*h.m 4s :z?(fd 5u)fW(fW 8@&bj"^$`(e)f'd#_ ZVSSUY#^(e.l 4s :z?DGIIGD#_$`%a%b&c'c'd(e(e(f)f)g*g*h+i,j-k/m 1p 3r 6v :y>}BF(e0n7w>~EJNY~=}%aVME>~ 7v 1o+i&c"]XTQNMLLMNPRTWY!\#^$`&b'd)f*g+i,j,j-k! "$&')+-/ 0 2 4 5 7 9 :<>?ACEGIKMOQTVY!\#^%a'd)g+i.l 0o 3r 5t 7w 9y ;{=|>~?@@A@>~<| 9y 6v 3r 0o,k)f%a!\WRMGB=|@CFJNV*gKE-k#_ZWTRPMJHEB?<| 9y 6v 3s 1p.lAGLQTUTQMG@ 9y 2q-k)g(f*g.l 3r 9y>~TT 2q&cS 5uV 7F'd<|!\&b)g*g'd$_ ZVSSUY#_)f/m 5t ;{@EHIJGD$_$`%a&b&c'd'd(e(f)f)f)g*h*h+i,j-k/m 1o 3r 6u 9x<|@ Z&c-k4s:z@DJXÌ5u!]RJC<{ 5t/n*g%a!\WSPNLLLMNPRUW Z!\#_%a&c(e)f*h+i,j,j-k"!#$&(*+-/ 0 2 3 5 6 8 9 ;<>?ACDFHJLOQSVY![#^%a'd)g,j.m 1p 4s 6v 9x ;{=}?ABCDEEC@=} :y 6v 3r/m+i'd#^YTNICADHLS%a@`6u'c!\XVTQOLIGDA>~ ;{ 8x 5u 2r 0n :z@FKORSROJD>} 7v 0o+i(e(e)g-k 3r 8x=|E=}/m#_OR 8N&c)f!\'d*h*h(e$` ZVSSV Z$`)g /n 6u<|AFHJJGC$`%a%b&c'c'd(e(e)f)f)g*g*h+h+i,j-l/m 0o 3r 5t 8x;{?Y%a+h1p7v<{@EXãx.mXOH@ :y 3r-l(e$_ ZVROMLLLMOQSVX ["]$_%b'd(e)g*h+i,j-k-k"  !#%'(*,-/ 0 1 3 4 5 6 8 9 :<=?@BCEGIKNPSUX ["^$a'd)g,j/m 2p 4t 7w :y<|?ACDFHJKJFA=} 9x 5t 1p-k(f$` [UOJDFJO Z3smA+h#^ZWUSPNKHEC@=| :y 7v 4s 1p/m :y?EIMPQPMHB;{ 5t.m*g'd'c(f,j 2q 7v ;z<| 6u,j![KK<W%a%b#_)f,j+i(e$` ZVSTV [%a*h 0o 7v=}CFIKJGC$`%a&b&c'd(e(e)f)f)g*g*h*h+i+i,j-l/m 0o 2q 5t 7w :z=}W#^)f.l3r7w;{B\c(fULE>~ 8w 1p,j'd"^YUQNMLLLMOQTVY!["^$`&b'd)f*g+i,j,j-k-k#  "$%')*,-. / 1 2 3 4 5 6 7 8 9 ;<=?ABDFHJMOQTWZ"]$`&c)f,j/m 2q 5t 8w ;z>}@CEGHKOTUPHA;{ 7w 3r.m*g%b!\VPJELT)fNM/m$` [XVTROMJGDA>~;{ 8x 6u 3r 0o.l 9x>~CHLNOMJF@ 9y 3r-k)f&c&b(e+i 1o 5u 8x 8w 2q(fWG+iAE#_#_"^%a+i-k,j(e$_ZVTTW!\&b+i 2p 8x>~DGJKJGC$a%b&c'c'd(e(f)f)g*g*h*h+h+i,j,j-l/m 0o 2q 4s 7v 9y<|U!\&c+i0o4s8w@cP#_QIC<| 6u 0n*h&b!\XSPNLLLLNPRTWY!\#^%a&c(e)f*h+i,j,k-k-k#!!#$&')*,-./ 0 1 2 3 3 4 5 6 7 8 9 ;<>?ACEGIKNPSVY!\#_&b)f+i.m 2q 5t 8x<{?BEGIKNT"]&c"^QE>~ 9y 4t 0n+i&c"]WQKE!\7wxU2q%b!\YWUSPNKHEC@=| :z 7w 4t 2q/n-k 8w=}BFJLLKHC=} 7v 1o+i'd%a%a'd*h/n 4s 6u 4sSDS#^"] 9P*g![Z'd-k.m,j(e#_ZUTTX"]&c,j 3r 9y@EHJLJF$`%a&b&c'd(e(e)f)g*g*g*h+h+i+i,j,k-l.m 0o 2p 4s 6u 8x :zT Z$a)f-k0o4t@o@YNG@ :y 4s.l)f$` [VROMLKLMNPRUX Z"]#_%a'c(e)g*h+i,j-k-k-k$! "#%&()*+,-./ 0 0 1 2 2 3 4 5 6 7 8 9 ;<>@BCFHJLORUX [#^%a(e+i.l 1p 5t 8x<|@DGJLNPY+i8w5t$_LA ;{ 6u 1p,j'd"^WQKEJW3r&c!\ZXVTQOLIGDA>~ ;{ 8x 6u 3r 0o.l+i 7v<|AEHJJIFA ;{ 5t/m*g&b$`$`&b)g.l 1p 3r!\XCOYQ 6 Z>}X!\)f 1o/n,j(e#_YUTUX"^'d-l 4s ;zAFIKMJF$`%a&b'c'd(e)f)f)g*g*h+h+i+i+i,j-k-l.m 0n 1p 3r 5t 7vLRX#^'c*h-l2q@3rTKD>~ 8w 2q,j'd#_ZUQOMLKLMOQSVX ["]$`&b'd(f*g+i,j,j-k-k-l$"!"$%'()*+,--../ / 0 0 1 2 3 3 4 6 7 8 : ;=?ACEGIKNQTWZ"]$a'd*h.l 1p 5t 9x=}BGKNPQT"]6uTV6uUD=| 7w 2q-k(e#^XQKEQ2q&b!\ZXVTRPMJGEB?<| 9y 7v 4s 1p/m,j*h 6u ;z?CFHHFC>~ 9x 3r-k(e$a#^#^%a(e,j/n 0o'd"]BKRE 9$`/nT#_+i 7w 0o,j(e#^YUTUY#_(f/m 5u<|CGJLMJE%a%b&c'd(e(e)f)g*g*h+h+i+i+i,j,j-k-l.m /n 1p 2q 4s 6uJPV!\$a(e+h0nB)gPHB<{ 6u 0o+i&c"]XTPNLKKLMOQTVY!\#^$`&c(e)f*h+i,j-k-k-k-l%"  !#$&'()*+,,---...// 0 0 1 2 3 4 6 7 9 :<>@BDFHKMPSVY!\$`'c*g-k 1o 4t 9x>~DKQVXWX%bB~֓V%aH>~ 8x 3r-k(e#^XQJDuF/m%a!\ZXWUSQNKHEC@=} :z 8w 5t 2q 0n-l+i)f 5t 9y>~BDFFDA<| 6v 1o+i&c#_"]"]#_&c*h,k-k-k$`@AJ<>'d%bP%b 1pA 0o,j'd"^XUTV Z$`)g 0n 7v>~DIKMNJE%a&b'c'd(e)f)g*g*h*h+i+i+i,j,j,j-k-l.m/n 0o 2q 3r 5tIOTY#^%b(e.mF`"]MF@ :y 4s.m)g%a!\WSOMLKKLNPRTWZ!\#_%a&c(e)g*h+i,j-k-k-l-l&#  "$%&()**++,,,,,,---../ 0 1 2 4 5 7 9 :<>@BDFHKMORUX![#_&b)f,j 0o 4s 9y?GQ [&b'd%a#^)gQ0oL? 9x 3r-l(e#^WPJCtT9y*h#_ [YXWVTRPMIFC@>} ;{ 8x 6u 3r 1o.m,j*g'd 4s 8x<|@BDCA>~ 9y 4s.m)g%a"] [ ["^%a(e*g [ 5u$_=DC 7D%aIY(eAL 0o,j'd"]XUTW [%a+h 1p 9x@FJLNNID%b&c'd(e(f)f*g*h*h+i+i+i,j,j,j,j-k-l.l/n 0o 1p 2q 4sHMSW![#_&c-lLDVIC=} 8w 2q-k(e#_ ZUROMKKKLNPSUX Z"]$_%b'd(f*g+i,j,j-k-k-l-l'$! !#$&'()**+++++++++++,--. 0 1 2 4 6 7 9 ;=?ACEGIKMORUX [#^%b(f,j 0n 4s 9y@KY,j7v9x2q*h.mb>~N@ 9x 3r-l(e"^WPI;{.m&b!\YXXXWVUROLHDA>~;{ 9x 6v 4s 1p/m,k*h(e&c 3r 7v ;z>~@AA?<{ 7v 2q,k'd#_ [YZ!\#_&b'd!\bY<I= 6G"]O!\*h/mP 0o+i&c!\WUUW!\&b,j 3r :zBHKMPNIC&b&c'd(e)f)g*h*h+i+i+i,j,j,j,j-k-k-l.l/m 0n 1o 2qBGLQUY!\$`-kS1pPGA;{ 6u 0o+i&c"^YTQNLKKKMOQSVY ["^$`&b'd)f*h+i,j-k-k-l-l-l'%" "$%&())********)))***+,-. 0 1 3 5 7 9 ;<>@BDFHJLNPRUW Z"^%a(e+i/n 3s 9yAO(eAX[I6u5uvJP@ 9x 3r-k'd"]VOH"]YXWWWXXXWTPKGC?<| 9y 7v 4s 2q/n-k+i)f'c%a 1p 5u 9y<|>~?>~<| 9y 5t 0n*h&b"]YXXZ!]#_$`!\ 3rM>O 7 6IER#_ 0oJ 0n+h%b![WUUX"]'d-l 5t<|DJMOQNHC&b'd(e(f)f*g*h+i+i+i,j,j,j,j,k-k-k-l.l.m/n 0o 1pAFKOSW Z#^-l\}%aKD?~ 9y 4s/m*g%a!\WSPMKKKLMOQTWY!\#_%a&c(e)g*h+i,j-k-k-l.l.l(%#  !#$&'()******)))((((()*+,-/ 1 2 4 6 9 ;=?ACEGHJLMOQSUX Z"]%a(e+i/m 3r 9yCU5udʖߛqG>~QP@ 8x 2q,j&c!\TMFVVVWX Z!\"^!]YSMFA=} :y 7v 4t 2q 0n-l+i)g'd%a,j 0o 4s 7w :z<|=|<| :y 6v 2q-l(f$` [WVVX Z!\!\X%b@DU 6 8IHU&bD@/m*g%a [VUVY#_(e/m 7v?FKNPRNG%b&c'd(e)f)g*h+h+i,i,j,j,j,j,k-k-k-k-k.l.m/m/n 0oAEINQUX"].leOWHB=| 7w 2q-k(e$` ZVROLKKKLNPRUW Z"]#_%b'd(f*g+i,j,k-k-l.l.l.l)&#!!"$%'())*****))(('''''()*+-. 0 2 5 7 9;>@BDFHJKMNOQRTVX Z"]$a'd+h.m 3r 9yD [H^HON? 8w 1p+i%b ZSLETUW [$`(e+i*h&cZPG@ ;{ 8w 5t 2q 0o.l,j*g(e&b$`+i/m 3r 6u 8x :z :z 9y 7w 4s 0n+i&c"^YVTTUWYXPP ;M 8 5 8@KY(eD 7w.m)f$_ZVUV Z$`*g 1o 9xBIMORSMF&b'c(e(f)g*g+h+i+i,j,j,j,k-k-k-k-k-k-k-l.l.m/m/n@DHLPSV![/nm4tOE@ ;z 5u 0o+i'c"^YTQNLKJKLNPSUX ["^$`&b'd)f*h+i,j-k-k-l.l.l-l*'$" !#%&'))*****))(''&&&&&'()+,. 0 3 5 8 :=?BDFHJLMOPQRSTUWY ["]$a'd*h.m 3r 9yE%abvSEK>} 7v 0o*h$`YQJCU Z%a-l7v=}<|4s'dUH? 9y 5u 3r 0o.l,j*g(e&c$`"^*h.l 1p 4s 6v 8w 8w 7v 5t 2p-l)f%a![WTRRSUUTHA?W 6 5 9BO!\+i 3r-k(e#^YVUW![%a+i 3r<{DLOQTSLE&c'd(e)f*g*h+i+i,j,j,k-k-k-k-k-k-k-k-k-k-l.l.l.m?CGKNQT Z0oss%aJC>} 9x 3s.m*g%a!\WSPMKJJKMOQSVY!\#^%a&c(e)g*h+i,j-k-l.l.l.l-l+(%# !"$&'()*+++**)(''&%%%%%&')*,. 1 3 6 8 ;>ACFHKMOPRSTTUVVWXZ!["^%a'd*h.l 3r 9yG+i\6vH<| 5u/m)f#^WOHW$`/n@Tb`M5t!\J>~ 7w 3r 0o.m,j*h(e&c$a#^!\)f,j /n 2q 4t 5u 6u 4t 2q/n+i'd#^YURPPQRRPA ;I$_ 5 6;FR#_ 3rH 1p,j'c"]XUUX!]&c-k 5t?~HNQSVTKD'c'd(f)g*h+h+i,j,j-k-k-k-k-k-k-k-k-k-k-k-k-l-l ;z>~BFILORZ1ptFUF@<{ 7v 2q-k(e$` ZVROLKJJKMORTWZ!\#_%a'd(f*g+i,j,k-k-l.l.l.l-l-)&#! "#%'()*+++++*)('&%%$$$%%'(*,. 1 4 6 9<?BEHKMPRSUVWXXXXYYZ [!\#^%a'd*h.l 3r 9yH2qc)fD ;z 4s-l'd!\TMF,jBfʑݮxI(eL>} 6u 1p.m,j*h(f'c%a#_!\ Z(e+i.l 0o 2q 3r 3r 2q 0n-k)f%a!\WSPNNNONL=>S 0o 4 7>IV%bFK 0n+i%b!\WUVY#^(e/n 8xBKQSUXSJC'd(e)f*g*h+i,j,j-k-k-k-k-l-k-k-k-k-k-k-k-k-k-k :z>}AEHKNQY2qp}-kMC>~ :y 5t 0n+i'c"^YTQNLJJKLNPRUX ["]$`&b'd)f*h+i,j-k-k.l.l.l.l-l.*'$" !"$&()*+,,,,+*)('&%$##$$%&(*,/ 1 4 7 :=@DGJMPRUWXZ [![!\!\!\!\![![!\!\"]#_%a'd*h.l 3r :yJ;ziXA 9x 2q+i%aYRJCdɭc/nN<| 4s/n,k*h)f'd%a#_"] [X'c*g,j/m 0o 1p 1p 0n-l*h'c#^YUQNLLLLKH=E!\ 5 5 8ALY'd(e 7w.m)g$` ZVUV Z$`*g 1p;{FOTUWYRH&c'd(f)g*h+i,i,j-k-k-l-l.l.l-l-l-k-k-k-k-k,k,j,j :y=}@DGJLOX1pg˰K ZGA<| 8w 3r.l)g%a!\WSOMKJJKLNPSVY![#^$a&c(e)g*h+i,j-k-l.l.l.l.l-k/,(%#! "#%')*+,---,+*)('%$####$%&(*,/ 2 4 7+-/ 2 4KNQTWY!\"]#_$`$`$`$`$`#_#^#^"^"^#_$`%b(e+h.m 3r :zMEsoRM>~ 6v 0n)g#_WOH}4tN ;z 2p-k+h)f'd%b$`"] [YW%b(e+h-k.m/m.m-k+i(e%a!\WRNLJIIIGD@M%b 4 5 :DO!\*g:z4t 1p-k(e#^YVUW [%a,j 4t?KTWW Z [QG'd(e)f*g+h+i,j-k-k-l.l.l.l.l.l-l-l-k-k-k,j,j,j,i 9y<|@CFHKNW0nYb/nOD?~ :z 6u 1p,j(e$_ ZVRNLJJJKMOQTWZ!\#_%a'd(f*g+i,j-k-k.l.l.l.l.l-k 1-*'$" !#$&(*+,-...-,+)(&%$####$%&(*,$&), 0 3 6 9<>@AAA!["^$`&b'c'd(e(e'd'd&c&b%b%a$`$`%a&c(e+i.m 3r ;{PQ^x/mF ;{ 4s-l'd!\TLE5tL 8x/n+i)f'd&b$`#^!\ZWU$`'d)f+i,j-k,j+i)f&b"^YTPLJHGGFDADT 5 4 6=GS#^-k,j@ /n+i&c"]XUUX"]'c.l 8xDPXZY"] [OE'd(e)g*h+i,j,j-k-l.l.l.l.l.l.l.l-l-k-k,j,j,j+i 5u 9x<|?BEGIMV-kJX>~!\HA=| 8x 4s/n+h&c"^YTPMKJJJKMORUX Z"]$`&b'd)f*h+i,j-k-l.l.l.m.l.l-k 2.+(%#! "#%')+,..///.-+*('%$#""##%&("$'+/ 3 7 9<>ADGJLMMKI'd)f*h+i+i,j,j,j,j+h)g(e'c&b&b'c(f+i/m 4s<|T_LyWA 8x 1p+h$`XPIB0oG 5t-k*g(e&c%a#_!] ZXVT#^%b'd)f*h*h*g(f&c$_ [VRNJHFEDCA> : 6 4 4 8?JV%a 0nK 3r.l*g%a [WUVY#^)f 1p<|JV!\!\!\$` ZLD(e)f*g+h+i,j-k-l.l.m.m.m.m.m.l.l-l-k-k,j,j+i+i 5u 8x ;{>~ADFHLU)f;{=})gPD?~ ;z 6v 2q-l)f%a!\WSOLKJJJLNPSVX![#^$a&c(e)g+h,i,j-k.l.l.m.m.l-l-k 4 0,)&$"  !#$&(*,./ 0 0 0 0/.,*)'%$#""#$%!!$(- 1 4 4 4 3 3 3 5 7 ;@FMSVVTP,j.l/n 1p 2q 4s 5t 5t 3r 0o-k*g'd'c'd)f+i/n 4s>}XnlA:zJ=| 5u.m(e"]UMEo'cA 2q+i(e'c%a$_"]![YWUZ"]$`&b'd(e(e(e&c$`!\XTPLHFDCB@> ; 8 5 3 5 :BMX&c 0o 7w/n,j(e#_ZVUV Z$`+i 5tBQ!\$`#_$`&cYJ'd(e)g*h+i,j-k-l.l.m.m/m/m/m.m.m.l-l-k,k,j+i+i*h 5t 8w ;z>}@CEGKS%a/m+iWHA=| 9x 5t 0o,j'd#_ ZUQNLJIJKLNQTVY!\#_%a'd(f*g+i,j-k-k.l.l.m.m.l-l,k 5 1.*'%#!  !"#%')+-/ 0 1 1 1 1 0.-+)'&$####$!!$*/ 2 1-)'&'),. 0 2 5 9@JU!\"^ [U 4s 7v ;z@DGEA :y 3r-k)f'd'd)f,j 0n 5t?"^{F=}p [B 9y 2q+i%aYQIBMU :z.m)g'd&b$`#^!\ ZXVSX ["^$`%b&c&c%b$`"]ZVQMIFDBA?>; 9 6 4 3 6<EP ['d/n 0o.l*h&b"]XUUW!\&b.l 9yIX&c'd%a'd(eVH'd)f*g+h,i,j-k.l.m/m/m/m/m/m/m.m.l-l-k,j,j+i*h*g 5t 7w :z=}?BDFIP [$a ZMD>~ ;z 7v 3r.m*h&b"]XTPMKJIJKMORTW Z"]$`&b'd)f*h+i,j-k-l.l.m.m.m.l-k,j 7 3/,)&$"!   !#$&(*,. 0 1 2 3 3 2 1 /.,*(&%####"!$+ 1 2.'"!#%(, 0 4 7BBA><?JX&b(e%b#^IQ!\(f*h%aTE 8x/n*h(e(e)g,j 0o 6uB'de2q>~ܐ5uK=} 6u/m(e"]UMF/nF 4s+i(e&c%a$_"^!\YWURVY!\"^#_$`$`#_"] ZWSOKGDB@?= ; 9 6 4 3 4 8?HS"](e-k.l,j(f$` [WUUX"](e 1p?Q%a+i*h'd+i(eSF(e)f*h+i,j-k.l.m/m/n/n/n/n/n/m.m.l-l-k,j+i+i*h 2q 4t 7v :y<|?~ACEHNUVPF@<| 9x 5t 1p-k(f$` [VROLJIIJKMPRUX ["^$a&c(e)g+h,j,k-k.l.m.m/m.m.l-k,j 9 5 1-*'%#"!!!"#%')+. 0 1 3 4 4 4 3 2 1/-+('%$###!$+ 2 3, !! "'. 7?DHJJYXSKCBN$`/m 3r 6v@FZh`F+iO<| 0o*h(e(e*g-k 1p 7wE+i;{)g@nJWB 9y 2q+i%aYQIBBT ;z/m)g'd&b%a#_"] [YVTQUWZ!\"]"]"]!\ ZWTPLIEB@>= ; 9 7 5 3 3 5 :AKU#_)f,j,j*h&c"^YVUVZ$`+h 6uG Z,j0o-k*h/m'cOD(f*g+h,j,k-l.l/m/n/n 0n 0n /n/n/m.m.l-l-k,j+i*h*g 1p 4s 7v 9y<{>~@BDFKONHB>~ ;z 7v 3r/n+i'c#^YUQNKJIIJLNQSVY!\#_%a'd(f*g+i,j-k-l.l.m/m/m.m.l-k,i ; 7 3/,)&$#"""#$&(*,/ 1 3 4 5 6 6 5 4 2 0.,)'&%$'##* 3 6-"$$!% 1?KT Z"^#_F'd*h,j*h$`THH"] 8wL#_n7wR;{/n)f'd(e*g-l 2q 9xH.mqЭ爺G)f&c>~I(eH<| 5u.m(e"]TME> [A 2q+i(e'c&b$`#_"] ZXUSPSUXY Z [ ZYWTQNJFC@>< ; 9 7 5 3 2 3 6<DMW$`)f+i*h(e$a!\WUUW![&b.m<|P(f4t5u/m/m2q$`L(e)f*h+i,j-k.l.m/n /n 0n 0o 0o 0n /n/n/m.l-l-k,j+i*h)f 1p 4s 6v 9x ;{=}?ACEHJHD?<| 9x 5t 1p-k)f%a!\WSOLJIIIKLOQTW Z"]$`&b'd)f*h+i,j-k.l.m/m/m/m.m-l,j+i= 9 5 1-*'%$###$%')+- 0 2 4 5 7 7 7 6 5 4 1/-*('%%&#( 3 9 2$*-' ->L Z[ 8<?CGKO 6u 8x 2q)fVM#_I;{w6uM 7w,j(e'c(e*h.l 3r ;zK/n[jJ,j![&b6u-kO@ 8x 1p+h$`XPHAB 5t-k)g(e'c&b$a#_"] ZXUROQSUWXXWVTRNKGDA>< : 9 7 5 4 2 2 4 8>FPY$`(e*g)f&c#^ZVUUX"](e 3rD!\3r=}9y2q5t4s [I(e)g+h,j-k-l.m/m /n 0o 0o 0o 0o 0o 0n/n/m.l-k,k,i+h*g)f 1p 4s 6u 8x :z<|>~@ACEFD@=} :z 7v 3r /n+i'd#_ ZVRNKJIIJKMPRUX ["^$a&c(e)g+h,j-k-k.l.m/m/n/m.l-k,j+i? ; 6 2/,)'%$#$$&'),. 1 3 5 7 8 9 9 8 7 5 3 1.,)(&+%&/ ; 9& 0 8 5$$ 3AM/ 2 6 9>BFJMQUYS 6u)g![.m,kg(eB 1p)g&c&c(e+i/m 4s=|N-kC@-k [W%a)fUD ;z 4s-k'c [SKD=.l+h)f(e'd&c%a#_"] ZWTQNOQSTUUUSQOLHEB?= ; 9 7 6 4 3 2 2 5 9@HQ Z$`'d(e'c$`!\XUTVY$`+i 9yO,j@F=|5u<{2qVF)f*g+i,j-k.l/m/n 0o 0o 0o 1o 0o 0o 0n/n/m.l-k,j+i*h)g/m 1p 3r 6u 8w :z<|>}?@BCCA>~ ;{ 8x 5t 1p.l*g&b"]XTPMKIHIJLNPSVY!\#_%a'd(f*g+i,j-k.l.m/m/n/n/m.l-k,j+iB= 9 4 1-*(&%$$%&(*,/ 2 4 6 8 : : : : 9 7 5 2 0-+)'+&* 8@ 6 1@E 3& 3)*- 0 4 9=BFJNQTW [E 8w 1oEc>~O 7v+i&c%a&b(e+i 0n 6u?O'd0n)g ZSU!\VG=} 6v 0n)f#^VNF?+i*g)f(f(e'c%b$`"] ZWTPKNOQRSSRPNLIFC@= ; 9 7 6 4 3 2 2 3 6 ;BKS [$`&c&c%a"]YVTTW![&c 0oA!];{ON?<{B.lPD)g*h,j-k.l.m/n 0o 0o 1o 1p 1p 1o 0o 0o/n/m.l-k,j+i*g)f.m 1p 3r 5u 7w 9y ;{=|>~?@A@>~<| 9y 7v 3r 0n,j(e$` [WROLJIHIJLOQTW Z"]$`&b'd)f*h+i,j-k.l.m/n/n/n/m.l,k+i+iD? ; 6 2/,)'&%%&')+- 0 2 5 7 : ;<<< ; 9 7 4 1/,*)+' 0AB->"^E*) 9H(+. 3 9?DIMQSUWY!\%bGFE&cQY<|.l'd%a$a&b)f,j 1p 7wAO!\#^XRPSSJ@ 8x 2q+i%aYQIB*h*h*g)g)f'd&c$`"] ZWSPJLMOPPPONLIFDA>; 9 7 6 4 3 2 1 2 4 7=DLT [#_%a%a#^ [WUTUX"])f 6uM/nO_TBDE(eL)f*g+i,j-k.m/n 0n 0o 1p 1p 1p 1p 1p 1o 0o /n/m.l-k,j+h*g(f.m 1o 3r 5t 7v 9x ;z<|=}>~??>~<| :z 8w 5t 1p.l*h&c#^YUQMKIHHIKMORUX [#^%a&c(e)g+i,j-k-l.m/m/n 0n/n.m-l,j*h+hFB= 9 4 1-+)'&&&()+. 0 3 6 9 ;<>>>< ; 8 6 3 0., 4,* 7HB 6J<#)?W&)- 3 :AHNSWYZ Z [!\#_'dSV9xT"^@ 0n(e$`#_$`&c)g-l 2q 9yBMTSPMNOJA :z 4s-k'd!\SLD=+i+i+h*g(f'c%a"^ ZVROHJKLMMMLKIGDA?< : 8 6 4 3 2 1 1 2 5 9?FNU [#^#_#^!\XVTTVZ$`-l?~!\CfmXGOC"]H)f*h,i-k.l/m 0n 0o 1p 1p 1p 1p 1p 1p 1p 0o /n/m.l-k,i*h)g(e.m 0o 2r 5t 7v 8x :z ;{<|=}=}=}<| :z 8x 6u 3r 0n,j(e%a!\WSOLJHHHJKNPSVY!\#_%b'd)f*h+i,j-k.l.m/n 0n 0n/n.m-k,j*h*hID? ; 7 3/,*('''(*,. 1 4 7 :<>?@??= ; 8 5 2 /- 6-,>MB>QK 2#(?+i$', 3 ;DMT Z#_%a%a$`#_#^#^$a*gcYY7vF Z@ 1o(f$`#^#^$`'c*h/m 4s ;zCJMMLKKIB;{ 5t/m(f"^VNF?-k-k,j+i)g'd%a"^ ZVRNFHIJKKJJHFDB?< : 8 6 5 3 2 1 1 1 3 6 :AHOU Z!]"]!\YVTSTW!\'d 3rK2q_ǀyZPZ<|UE*g+i,j-l.m/n 0o 1p 1p 2q 2q 2q 2q 1p 1p 0o /n/m.l,k+i*h)f,j.m 0o 2q 4s 6u 8w 9y :z;{<|<|<{ :z 9x 7v 4s 1p.l*h'c#_ZUQNKIHHIJLNQTW Z"]$`&b(e)g*h,i,k-l.l/m 0n 0o 0o/n.l-k+i*h*hLGB= 9 5 1.+*((()*,/ 2 5 8 :=?AAAA?= : 7 4 1/ 7./DQ 2CNB+ 1+" 9G"%* 2<GQ [&c*h-k-k+i)f&c$`$`%b+i(eSK$`kVs1oR=} 0o)f$`"^"]"^$`'d+i 0o 6u<|CGIIIHGC<| 6v 0o*h$`XPHA :.m.l,j*h(e%b"^ZUQLEFGHHHHGEDA?= ; 8 6 5 3 2 1 1 1 1 4 7<BIOUY [ [YWUSSUX#^+i<|!\Kכဵ]^_2qO)f*h+i-k.l/m 0n 1o 1p 2q 2q 2q 2q 2q 1p 1p 0o/n.m-l,j+i*g(e,j.l 0o 2q 4s 6u 7w 9x :y :z ;{ ;z :z 9y 7w 5t 2q/n,j(f%a!\XSPLJHHHIKMORUX![#^%a&c(e*g+i,j-k.l.m /n 0o 0o 0o/m.l,j*h*h*hOJE@ ; 7 3 0-+))))+-/ 2 5 8 ;>@BCCCA?< 9 6 4 1 : 0 2IU 5DI : 3B ;/!!#( 1<HU%a-k 3r 6v 8x 6v 1p,j(e%a$`&b+hOC 5tI:z3rN [G 9y/n)f$`"]!\!\"^%a(f-k 2p 7w=}BEGGFEB=} 7w 1p+i%b ZRJC< 0o/n-l+i)f%b"^YTOKCDEFFFEDCA?= ; 9 7 5 3 2 1 0 0 1 2 4 8=CJPTWYXWUSRSV Z&b 1pH3sqЭ胶dp[(eJ)g+i,j-l.m/n 0o 1p 2q 2q 2q 2q 2q 2q 2q 1p 0o/n.m-k,j+h)g(e,j.l 0n 2q 3s 5t 7v 8w 9x 9y :y 9y 9x 7w 6u 3s 1o.l*h'c#_ ZVRNKIHHHIKNPSVY!\#_%b'd)f*h+i,j-k.l/n 0o 1o 1o 0o/m-k,j*g*g*hRMGB> 9 5 2/,+*)*+- / 2 5 9<?ACEEEDB?< 9 6 3= 2 3LY 7CD 5 :(eC!&!!&. :HW(e 2q :zAQ(eP 7v-k(e%a$_%a)gB 0o$`<|DN>~ 5u.l(f$`"] [ Z!\#^&b*g.l 3r 8x=}ADEEDA=} 8w 2q,j&c![TLE> 7 0o.m,j)f%b"]XSNIABCCDCCB@?= ; 9 7 5 4 2 1 0 0 0 1 2 5 9>DJOSVWVUSRRTW"])g 9y ZR탷qЁNZF*h+i-k.l/n 0o 1p 2p 2q 2r 3r 3r 3r 2q 2q 1p 0o/n.m-k,j*h)f*g,j.l 0n 1p 3r 5t 6u 7w 8x 9x 9x 8x 7w 6u 4s 2q/m,j(f%a"]XTPMJHGGHJLNQTW Z"]$`&c(e)g+h,j-k-l.m 0n 1o 1p 1p 0n.m-k*g*g*g*gUPJE@< 7 4 0.,+*+,- 0 2 6 9<?BEFGGFDB?; 8 5A 5 5N#^KB@<=PB#'"#+ 7EU(e 5t@I4s/m 6v,j'c#_"^$`A 4sZW :z#_+i 8w 2q-k(e$`!\ ZYZ!\#_'d+i 0n 5t :y>}ABCB@=| 8x 3r-k'd"]UNF? 8 2q/n,j)f%a!\WQL>?@AAAA@?>< ; 9 7 5 4 2 1 0 0 0 0 1 3 6 :?EJORTUTSRQRUY$`/mF4tىچ=|Q)g+h,j-k.m 0n 1o 1p 2q 3r 3r 3r 3r 3r 2q 2q 1p 0o/n.l-k+i*g(f*g,j-l/n 1p 3r 4s 5u 6v 7w 8w 7w 7v 6u 4t 2q 0o-k*h'c#_ [VRNKIHGGIJMORUY!\#^%a'c(f*g+i,j-k.l/n 0o 1p 2p 1p /n.l,j)g)g*g*gXSMHC> : 6 2 0-,++,. 0 3 6 9=@CFGIIHFDA> ; 8E 8 5M*hP@=? :E ;#*$ ' 1@&. 3rAT-kM 1o)f$`!\!\#^ 7w%bK"]/n 4s 0n,j(e$`!\YXXY!\$`(e,k 1p 6u :z>}@AA?<| 8w 3r.l(e"^VOG@ : 2q/n,j(e$` ZUOJ<=>????>=; : 8 7 5 4 2 1 0 /// 0 1 4 7;@EJNQRSRQQQSV!\(e 7wYXݧz-kJ*g+i-k.l/n 0o 1p 2q 3r 3r 3r 3s 3r 3r 3r 2q 1p 0o/m.l,j+i)g(e*g,i-l/n 1p 2q 4s 5t 6u 6v 7v 6v 6u 4t 3r 1p.m+i(e%a"]XTPMJHGGHIKNPSWZ"]$_&b'd)f*h+i,j-l.m 0n 1p 2q 2q 1p/n-k,i)g)g)g*g![VQKFA< 8 5 1/-,,-. 0 3 6 9=@DFIJKJIGDA= :J< 6J 0oW?@BD< 4# 6.'!"'!-ARTGLZ 5t+h%a!\Y Z"]+iOI.lM"^.l+h'd$`!\YWVWZ"]&b*g.m 3r 7v ;z=}?~?>} ;{ 8w 3r.l)f#_WPHA ; 4/n+i'd#^XRMG ;<<===<; : 9 8 6 5 3 2 1 0//// 0 2 4 8<AFJMOQQPPPRTX#_-lD5t`!\F*h,j-k.m 0n 1o 2q 2q 3r 3s 4s 4s 4s 3r 3r 2q 1p 0o/m-l,j*h)f'd*g+i-k/m 0o 2q 3r 4s 5t 5u 5u 5t 4s 3r 1p/n,k*g&c#_ [VRNKIHGGHJLOQUX ["^$`&c(e)g+h,j-k.l/n 1o 2q 3r 2q 1o/m-k)f)f)f)g)g#_YTOID? ; 7 3 1/---. 0 3 6 9=@DGJKLLKIGD@= :B 7F)f#_FBEGH.B< 4,%(! 0G#_ 6u 3rTM"]$`H 5u+i%a [WWY.lXC Z(e,j*g'c#_ [XVUUW [#_'d+i 0n 4s 8w ;z<|=}<| :z 7v 3r.l)f#_XQIB; 5.l*g%b!\VPJE 9 : : ; ; : : 9 8 7 6 4 3 2 1 0/.../ 0 2 5 9=AEILNOOOOPRV ['d 6uW\DQ*g+i,k.l/n 0o 1p 2q 3r 3s 4s 4s 4s 4s 3r 3r 2q 1p 0n.m-k,i*h(f(e*g+i-k.m 0o 1p 3r 3s 4s 4t 4t 4s 3r 1p 0n-l+h(e%a!]XTPMJHGGGHJMPSVY!\#_%a'd)f*h+i,j-k.m 0o 2q 3r 3r 2q 0o.l,j)f)f)f)f)g&b"]WRLGB> 9 6 3 0/../ 0 2 5 9<@DGJLNNNLJGC@=H ;@%a 2pO :GJLKHC; 3*" 0#//F%b>~ 9yMGC<| 3r*h$_YUTUX"]EJ 1p-kU(e&b#^ ZWTSSUX!\$a)f-k 1p 5t 8w :z ;z :z 9x 6u 2q.l)f$_XQJC< 6,j(e#_YRLGB 7 8 8 9 9 8 8 7 6 5 4 3 2 0 0/..../ 1 3 6 9=AEHKMMNNOQTX#^,jB4s.mJ*h,j-k.m 0n 1p 2q 3r 3r 4s 4s 4t 4s 4s 3r 3r 2q 1o /n.l-k+i)g(e(e)g+i-k.m 0n 1p 2q 3r 3r 3s 3r 3r 1p 0o.l,i)f&c#_ ZVROKIGGGGIKNQTW Z"]$`&b(e)f*h,i-k.l/n 1p 3r 4s 3r 2q 0n-l+i)f)f)f)f)f(f$` [UPJE@< 8 5 2 0/./ 0 2 5 8<@DGKMOPPOMJFC@P@<Y ZBIMOPNJC : 1(!@( 5-P.m 1p 9x"^ ;{ :y 5t.m'd!\VRPQT#_IA%aHY'c$`"]YVSRQSUY"^&c*h.m 2q 5u 7w 9x 8x 7w 5t 1p-k(f$_YRJC= 7*g%a![UOIC> 6 6 7 7 7 6 6 5 4 3 2 1 0/..--../ 1 3 6 :=AEHJKLMNORU Z&c 4sV_k [E+i,j.l/n 0o 1p 2q 3r 4s 4s 5t 5t 4t 4s 3s 3r 2q 0o/n.l,j+h)f'd(e)g+i,j.l/n 0o 1p 2q 2q 2q 2q 1p 0o.m,j*g'd$`!\XTPMJHGFGHJLORUX [#^%a&c(e*g+i,j-k.m 0o 2q 4s 4t 3s 1p/m-k(f(f(f)f)f)f+i'd#_YSNIC? ; 7 4 1 0/ / 0 2 5 8;?CGKNPQRQOMJFCWH<MH(fLJNRSSQKC 9/&Y 5 :GXTF$_OP.l,j(e"^WRNMMP"^L=U+i"]%a#^ [WTQPPQSV [$`(e,j/n 3r 5t 6v 7v 6u 3s 0o,k(e#_YRKD= 7 1"]WQJE? 4 4 5 5 5 5 4 4 3 2 1 0 0/.-----. / 1 4 7 :>ADGIJKLNPSW"^+iA4sDO*h+i-k.m 0n 1p 2q 3r 4s 4s 5t 5t 5t 5t 4s 3s 2r 1p 0o/m-l,j*h(f&c(e)f+h,j-l/m 0n 1o 1p 1p 1p 1p 0n.m-k+h(e%b"^ ZVRNKIGFFGHJMPSVY!\#_%b'd)f*h+i,j.l/n 1p 4s 5t 5t 3r 1p.m,j(e(e(e(f(f)f.m*h&b"]WRLGB= 9 6 3 1 0 0 1 2 4 7 ;?CGJNQSSSRPMIFCQAC(eEYCNSVWYVKB 8.% 4sS 5IF, 4O-kT 1o"] ZT&c'cVHIL ZL ;K 3s#_K!\YVRPNNOQTX"]%b)g-k 0o 3r 4s 4t 4s 2q/n,i'd#_XQKD= 7 1XRLF@< 2 3 3 3 3 3 2 2 1 0 0/.---,,--. 0 2 4 7 :>ADFHJKLNQU Z&b 3rTa+iH+h,j.l/n 0o 2p 3r 3s 4s 5t 5t 5t 5t 5t 4s 3r 2q 1p 0o.m-k+i*g(e&b'd)f*h,j-k.l/n 0n 0o 0o 0o /n.m-k+i)f&c$_!\XTPMJHFFFGIKNQTW Z"]$`&c(e)g+h,j-k.m 0o 3r 5t 6u 5t 3r 0o-l+i(e(e(e(e(e(f 2q-k)f$a [UPJE@< 8 5 3 1 1 1 2 4 7 :>BFJNQTUUTRPMIF ZJ?Wt,iONSWZ#^ 8w&cKA 7-%@Q 4 4 2,@P :z@ 0oK-k9y 8x!\AHLJ :C'd$`LYVSPNLLMORV Z#_'d*h.l 0o 2q 2q 2q 0o.l*h'c"^XQJD= 7 2SMGA< 8 1 1 1 1 1 1 1 0 0/..-,,,,,,-. 0 2 5 8 ;>ACFHIKMOSW"^+i@3reV*g+i-k.m 0n 1p 2q 3r 4s 5t 5t 5u 5u 5t 5t 4s 3r 2q 1p 0n.l,j+h)f'd&b'd)f*h+i,k.l.m/m/n/n/m.l-k+i)g'd%a"]YURNKIGFFFHJLORUY!\#^%a'd(f*g+i,j-l/n 2q 5t 7v 7v 5t 2q/n,k(e(e(e(e(e(e(e 5t 0o,j'd#_YSNIC? ; 7 5 3 2 2 2 4 6 9=AEIMQUWWVUSPLIFUDI 1p?&cHRW Z"]@w 3rKA 7-% 9xU 8$&-D(eY-kQ*h 3r>} ;z.mN 6IF 8>#^#_KVTQPQJJKLOSW!\%a(e+i.l /n 0o 0o/m,j)g&b"]WPJD= 7 2MGA< 8 4/ 0 0 0 0//..--,,,+++,,-/ 0 3 5 8 ;>ACEGIKNQU Z&b 3rSb:yK*h,j-l/m 0o 2p 3r 4s 4t 5t 5u 6u 6u 5u 5t 4s 3r 2q 1o/n.l,j*h(e&c&b'd(f*g+i,j-k.l.l.m.m.l-k+i*g(e%b#^ [WSPLJGFEFGHKMPSWZ"]$`&b'd)f*h,i-k.m 1o 4s 7v 8w 7w 5t 1p.m,j(e(e(e(e(e(e(e 8x 4s/n+h&c"]WRLGB> : 7 4 3 2 2 4 5 8<?DHLPUZ ZXWVSPLI#_OE%a#_J%aRUZ"]'cw 6uK@ 6-%  [ 5( 8&G@MHNR Z$aR 8EB 5 ;Z!\EMQFRUNHIJMQUZ#^&c)f,i-l.l.l-k+h(e$a![VPIC= 7 1,A< 8 4 1......---,,++++++,,./ 1 3 6 8 ;>ACEGJLOSX#^+i?3r#_E+i-k.m 0n 1p 2q 3r 4s 5t 6u 6u 6u 6u 5u 5t 4s 3r 2q 0o/m-k+i)g(e$`%b'd(e)g*h+i,j-k-k-k-k,j+i*g(e&c$`!\YUQNKHGEEFGIKNQUX ["^$a&c(e*g+i,j-l/n 2q 6u 9x 9y 7w 4s 0o-k'd'd'd'd'd'd(e(e<{ 7w 3r.l)g%a![VPKEA= 9 6 4 3 3 4 5 7 :>BFKOS [$`"]YXVSPLI!\KPM&b&cUX!\#^.l*hJ@ 6.&!U 6<H( 6 3 9F0oUGH 5.< 1 9WYENMETYQFFHKOSW!\$`'d)g+i,j,j+i)f&c#_ ZTOHB< 6 1,; 7 3 0.-----,,,+++****++,-. / 1 4 6 9<>ACFHKNQU [&c 3rSbLO*h,j-l/m 0o 2q 3r 4s 5t 5u 6u 6v 6v 6u 5u 5t 4s 3r 1p 0n.m,k+h)f'd$`%b&c(e)f*h+i,i,j,j,j,j+i*g(e&c$`"] ZVSOLIGFEEFHJMOSVY!\#_%b'd)f*h+i-k.m 1p 5t 8x :z :y 7v 3r/n,j'd'd'd'd'd'd'd'd? ;{ 6v 2q-k(e$`ZTNID@< 8 6 4 4 4 5 7 9=@EIMRY(f*h#^ ZXVSPL(fXL/nnF(eWY!\#_,jD1pYI@ 7/(#"]= 9 ;-&$ 6- 4Y 5</- 6. 9UUEPKEW"]SHDFIMQUY"^%a'd)f*g*g)f'd%a"]XSMGA< 6 1, 5 2 0-++++++++*********+,-. 0 2 4 7 9<?ADFILOSX#_+i?2q*gF+h,j.l /n 1p 2q 3s 4t 5u 6u 6v 7v 6v 6u 5u 4t 3s 2q 1p/n.l,j*h(e&c$`%a&c'd(f)g*h+i+i+i+i*h*g(e'c%a#^ [XTQMJHFEEEGHKNQTW Z"]$`&c(e)g+h,j-l 0n 3r 7w ;z<| :z 6v 2q.m+i'd'd'd'd'd'd'd'dC?~ :z 5u 0o,j'd#^XRMHC? ; 8 6 5 4 5 6 8 ;?CGLPU'd 7w 0o#_ [YVSPL'dUT$`e(eXY!\"^$a 1p)gQH@ 7 0*%.lL 7,'& 5ID 8I 2*/ 0+ ;SOGSMFZ%aVGBDGKOSW [#_%b'd(e(e'd%b#_ [WQLF@ ; 5 0+ 1.,+********)))))))**+,-/ 1 3 5 7 :=?BEGJNRV!\'d 3sSaaS*g+i-k/m 0o 2p 3r 4s 5t 6u 7v 7w 7v 7v 6u 5u 4t 3r 2q 0o/m-k+i)g'd"^$_%a&b'd(e)f)g*h*h*h*g)g(e'd%a#_!\YUROLIGEEEFGILORUX!\#^%a'd(f*g+i,j.m 1p 6u :z=}=} :y 5t 1o-k'd'd'd'd'd'd'd'd'dGB>~ 9y 4s/n+h&b!]WQLFB> : 8 6 5 5 6 7 :=AEJNR!\ 9yN 4s#_![YVSOL&bT&bi`,iWX Z![ [ ZUNG? 8 1,' 5t#_M@ ; 1?QJ 7,- 6+*>QIIUMF"](eWFACEIMQUY!\#_%a&b&b%a$_!]YUPJE? : 4/+.+*))))))))))(())))*+,-. 0 1 4 6 8 ;>@CFILPTY$`,j@2q0oH*h,j.l/n 1p 2q 3s 5t 6u 7v 8w 8w 7w 7v 6u 5t 4s 3r 1p 0n.l,j*h(f&c"]#_$a&b'c'd(e)f)g)g)f)f(e'c%b#_"]ZVSPMJGFEDEFHJMPSWZ"]$`&b'd)f*h,j-k 0n 4s 9y>}?=} 9x 4s/n,j'c'c'c&c&c&c'c'c'cJFB=} 8x 3r.m)g%a [UPJEA= : 7 6 5 6 7 9<?CGLPV,j#^,j 6u#_!\YVSOL%aT/n:z ZTVXXXUQLE? 8 3.*'&%!\K= 9 7 3 5<*&-CMBNXLH$` 0nUD?ADGKOSW Z!]#_$`$`#_"] ZWRNIC> 8 3.**)(((((((((((((()))*+,-/ 0 2 4 7 9<?ADGKNSW"](e 4tT`sU)g+i-k.m 0o 1p 3r 4s 6u 7v 8w 9x 9x 8w 7v 6u 5t 4s 2q 1p/n-l+i*g(e&b"]#_$`%a&c'd'd(e(e(e(e'd&c%a$`"] ZWTQNKHFEDDEGIKNQUX ["^%a&c(e*g+i,j.m 2q 7v=|AA=} 7w 2q.l&c&c&c&c&c&c&c&c&c&cNJFA<| 7w 2q-l(f$`ZTNID@< 9 7 6 6 6 8 :=AEINR [CaF 5u#_!\ZVSOL$`T 0nrRSQSTTSQMIC> 8 3/,*)( 1p'cWOMNP%% 3FF<SYJJ(e 4sRA=?BEIMQTX Z!\"]"]!\ ZXTPLGB< 7 2-)(''('''''''''(((()*+,-. 0 1 3 6 8 ;=@CFIMQV [%a-lA2q4tH*h,j.l/n 1o 2q 4s 5t 7v 8x :y :z 9y 8x 7v 6u 5t 3r 2q 0o.m-k+h)f'd!["]#^$`%a&b&c'd'd'd'd'c&c%a$`"] [XUROLIGEDDDFGJLPSVY!\#_%b'd)f*h+i-k 0o 5t ;{ACA<| 6u 0o,k&c&c&c&c&c&c&c&c&c&cRNJE@;{ 6v 1p,j'd#^YSMHC?< 9 7 6 6 7 9;?BFKOT'd/n^ 4s#_!\ZVSO/m$`S&c+h|B 7wONOPPOMIEA< 8 4 0.,+++H 3r-k%#") ;E>C!\XEK.l%bK=;>ADGKORUXY Z ZYXURNIE@ ; 6 1,(&''&&&''''''''(())*+,./ 1 3 5 7 9<?BEHLPTY#^)f 6uT^|V)f+i-k.m 0n 1p 3r 5t 6v 9x ;z<|<| ;z 9x 7v 5u 4s 3r 1p 0n.l,j*g(e&b [!\"^#_$`%a&b&c&c&c&c%b%a$_"^![YVSPMJHFDDDEFHKNQTW Z"]$`&c(e)g+h,j.m 2q 9x@EEA :z 4s/m&b&b&b&b&b&b&b&b&b&b&bWRNIE@ ;z 5u 0o+i'c"]WRLGC?< 9 7 7 7 8 :=@DHLQW 2q{m 3r#_!\ZVSO /n%aRT 5uVI*hOAJKKJHEA> : 7 3 1/-,,,-"  % 2?> 4LR?L$`WC 9 :<?BFJMPSUWXXWUROKGB> 9 4/+'&'(&&&&&&&&'''(()*+,-/ 0 2 4 6 9 ;>ADGKNSW!\&c/mB2q5tH*g,i-k/m 0o 2q 4s 6u 8x<{>~?>~<{ 9x 7v 5t 4s 2q 1o/m-k+i)f'd%a [!\"]#^$`$`%a%b%b%b%a$`#_"]![YWTQNKHFEDCDEGILORUY!\#_%a'd(f*g+i-k 0o 6u>}EHF@ 8x 2q-k&b&b&b&b&b%b%b%b%b%b%b#^XSNID? :y 5t /n*h&b!\VQKFB> ; 9 8 7 8 9 ;>AEINRYBp 1p#_!\YVSOK&cSJU-k 1o&cRE<FEDB@= : 7 5 2 0.---#- :< 4=VDI 8JUI : 6 8 ;>AEHKNQSTUUTROLIE@< 7 2.*&&(*%%%%%&&&'''()*+,-. 0 2 3 6 8 :=@CFJMQV [$`+h 7wU[xU)f*h,j.l 0n 1p 3r 5t 8w<|@DDA=| 9x 6v 5t 3r 2p 0o.l,j*h(e&bY Z!\"]"^#_$`$`$`$`$`$_#_"]![YWTROLIGEDCCDFHJMPTW Z"]$`&b(d)f*h,j.m 3r :zCJKF>~ 6u 0o,j%b%b%b%b%b%b%b%a%a%a%a#_XSNID>~ 9y 4s/m*g%a [UPJFA> ; 9 8 7 8 9<?BFJNS!\Rk 1o#_!\YVSOK(eVHES$`$`WK@ 8@?= ; 9 7 4 2 0/- $- 7 9 3 2HHP= 1DG< 4 6 7 :=@CGJMOQRRRQOMJFB> 9 5 1,(%)*+$%%%%%&&''())*,-. 0 1 3 5 7 :<?BEILPUY#_(e 1pD2q1pG)g+i-k/m 0o 2q 4s 7w<{BHKIC=} 9x 6u 4s 2q 1p/n-k+i)f'd%aYZ [!\"]"^#_#_#_#_#_"^"] [YWUROMJHFDCCCEFIKNRUX![#^%a'c(e*g+i-k 1o 7wAJOLE;{ 4s.l%b%b%a%a%a%a%a%a%a%a%a%a-k$`YSNHC>} 8x 3r.l)f$` ZTOJEA> ; 9 8 8 9 :<?CGKOS#^ Z_ 0o#^!\YVROK*h![LACPYYQG> 7 1-* 5 3#"!  !$(/ 5 5/+<OQA 2 4 : 8 2 2 7 6 9<?BEHKMNOOONLJGC?; 7 3/+'$+-.$$$%%%&&'(()*+-. / 1 3 5 7 9<?AEHLOTX"]&c-k :yWWgR(e*h,j.l/n 1p 3r 6u ;zBLTUOE=} 8x 5t 3r 2q 0o.m,j*h(e&c$`XY Z![!\"]"^"^"^"^"]!\ [YWUSPMKHFDCCCDEGJMPSVZ!]$_%b'd)f*h,j/m 4s=}HPRLB 8x 1p,j%a%a%a%a%a%a%a%a%a%a$a$`.m%aYSNHC=} 8w 2q-k(e#_YTNIEA= ; 9 8 8 9 ;=@CGKOT#^WO /n#^![YVROK-k$aSE=AJRRNG@ 9 4 0-*)((),/ 3 4 0+( 4CJ? 2+. 0/ 0 4 : 6 8 ;>ADFIKLMMLKIGD@= 9 5 1-)&"./ 0$$$%%&&''()*+-. / 1 3 5 7 9<>ADGKOSW!\%a*h 3rF1p*gE)f+i-k.m 0o 2q 5t 9yAN!\'d$`TF<| 7w 4t 3r 1p/n-k+i)f'd%aVXY Z [!\!\!]"]!]!\![ ZYWUSQNKIGECCCCDFHKNQUX ["^$a&c(e)g+i-k 1p 9xDPVTJ? 5u/m%a%a%a%a%a%a$a$a$`$`$`$`$`   0o&b ZTNHB=| 7w 2q-k(e#_YSNID@= ; 9 8 8 9 ;=@DGKOT"]I=|/n"^ [YVROKH(e ZMA : ;AFIIFB> : 8 6 5 4 4 4 2/*&&. 8> 8.))*,. 3 9 3 5 8 :=@BEGHIJJIHFDA= : 6 2/+'$! 1 2#$$$%%&''()*+-. / 1 3 5 7 9;>ADGJNRV [$`(e/m<{XROM(e*g+i-k/n 1p 3r 7w?M%a4s9x.lWF ;{ 6u 4s 2q 0o.m,j*h(e&b$`VWXY Z [ [![![ [ [ZYWUSQNLIGEDCBBCEGILOSVY!\#_%b'd)f*h,j/m 5t@MX ZSG;{ 3r-k%a$a$a$`$`$`$`$`$`$`$`$`$`"""J2q'c [TNHB<| 7v 1p,j'd"^XRMHD@= ; 9 8 9 9 ;=@DGKOS Z 9yN*h.l"] [XUROKHD$`VKA 9 6 6 8 :;; : 9 7 4 2/+(%#%). 1.)'(*, 0 4 9> 2 5 7 :<?ACEFGGGFEC@> ; 7 4 0,)&# 4 5#$$%%&&'()*+-. 0 1 3 5 7 9;>ADGJNRV Z#_'d,j 5uH0o"]'d)f*h,j.l 0n 2q 5u=|K&b@VP5uXD 9y 5t 3r 1p/n-k+i)f'd%a#^VWXXYZ Z Z ZZYXWUSQOLJHFDCBBCDFHKNQTW ["^$`&c(e)g+h-k 2q ;{IX$_!\QC 8w 0o$`$`$`$`$`$`$`$`$`$`$`$_#_#_####N4s(e!\TNHB<| 6v 1o+i'c"]WRMHD@= ; 9 9 9 : ;=@DGKNRW,iV8xR,j"] ZXUROKHDA!]ULC ; 5 1/.,+*('&%$$$%&&&%&(/ 1 4 9=@ 0 2 4 7 9;>@ACDDEDCB@= ; 8 4 1.*'$! 7 7#$$%%&'()*+-. 0 1 3 5 7 9<>ADGJMQUY"^&c+h 1p>~XK8wH'd)g+i-k/m 1o 4s :yF#_Fuщh9xU@ 7w 4s 2q 0o.l,j*g(e&b$_TUVWXXYYYYXWVUSQOMJHFDCBBBCDGILORVY!\#_%a'd(f*g,j/n 7vDT%b(e!\M?~ 4s.l$`$`$`$`$`$`$`$_$_#_#_#_#_#_%%%%%S6u)f!]UNHA;{ 6u 0o+i&c"]WRLHC@= ; 9 9 9 : ;=@CGJNQU#^ 6uI>~)g!\YWTROKHEA>;TMG@ ; 6 3 0.,)(&%%$$$%&(* 6 9<?BD. 0 2 4 6 8 :<>@AABBA@>< : 7 5 2.+(%"  : 9#$%%&'()*+-. 0 1 3 5 7 9<>ADGJMQUY"]%b)g/m 8wJ.mmfR&c(e*g,j-l/n 2q 7vAXBw5uO=} 6u 3r 1o/m-k+i)f'c$`"^STUVWWXXXWWVTSQOMKIGECBABBCEHJMQTW Z"]$`&b'd)f+h-k 3r>~O%a-k)gYH :z 1p$`$`$`$`$_$_$_#_#_#_#_#_#_#^#^''''''W8x*h"^UNHA ;{ 5u 0o+i&b!]WQLHC@= ; 9 9 9 9 ;=?BFILPSX&b/n.m&b [XVTQNKHEB>< 9 6JEA= 9 6 4 1 0.--,--./ 0>ADGII,. 0 2 4 6 8 9 ;<>????>= ; 9 7 4 2/,)&# ; ;$$%&'()*+-. 0 2 3 5 7 :<>ADGJMQUY"]%a)f-k 4s@XC&bD'd)f+h,j.m 0o 4s<|O8wt,kH 9y 4s 1p 0n.l,j*g'd%b#_!\STUUVVVVVVUTSQOMKIGECBAABCDFILORUY!\#^%a'c(e*g,j 0n 9xH"]/m2q(fSB 6u.m$_$_#_#_#_#_#_#_#_#_#^#^#^"^"^))))))\:z+i#^VNHA ;{ 5u 0n+h&b!\WQLGC@= ; 9 8 8 9 :<?AEHKNQTY#^$`"]YWUSQNKHEB?< 9 7 4QNKHECA?>= ; : 9 7CFILNON+-. 0 2 3 5 7 8 : ;<<<<< ; : 8 6 4 1/,)&$!=;$%&'()*+-. 0 2 4 6 8 :<?ADGJMQTX!\$a(e,j 1p :zK+i[Ū;zI&b(e)g+i-k/n 2q 8wF*hr_"]A 6v 2q 0o.m,k*h(e&c$`"]QRSTTUUUUUTSRQOM)*+,-.AAABCEGJMPTW Z"]$`&b'd)f*h-l 4sAV.l9x4s%bM=} 2q#_#_#_#_#_#_#_#_#^#^#^#^"^"^"]"]*++++++a=|-k$_VOHA ;z 5t 0n*h&b!\WQLHC@= ; 9 8 8 9 :<>@CFILOQTVXXVUSRPMKHEB?= : 7 5 3 1 [VSPNKHEB&&'OT!\)*+,-/ 0 1 3 4 6 7 8 9 9 : : 9 9 8 6 5 3 1.,)'$">$%&'()*+-/ 0 2 4 6 8 :=?BDGJMQTX!\$`'d+i 0n 6vBX;zp^R%a&c(e*h,j.l 0o 4t?~XSDO ;{ 4s 1p/n-k+i)f'd%a#^![QRRSTTTTTSSRP&'(*+,-./ 0 0ABDFIKORUX![#^%a&c(e)g,j 0o ;{N*g<|@2q [F 8w/m#_#_#_#_#^#^#^#^#^"^"^"^"]"]"]"],,------e?~.l$`WOHA ;z 5t /n*h&b!\WQLHD@= ; 9 8 8 8 9 ;=?BDGJLNPRSTSSRPNLJGEB@= : 8 6 4 2 0.-,+*))(((())*++,./ 0 1 2 3 5 5 6 7 7 7 7 7 6 5 3 2 0.+)'$" @>%&'()*,-/ 0 2 4 6 8 ;=?BEGJNQTX!\$`'d*h.m 4s<|L(eJ#^#_%b'd)f+h,k.m 2q 9xK7w,iD 7v 2q 0n.l,j*g(e&b#_!\YPQRRSSSSRRQP%'()*,-./ 0 1 1 2CEGJMPSWZ"]$_%b'd)f*h.l 6uF$_;{JB-lS@ 3r#^#^#^#^#^#^#^"^"^"^"]"]"]"]"]!]!\..///////iA/m%aWOHA ;z 5t /n*h&b!\WQLHD@= ; 9 8 8 8 9 :;>@BEGJLMOPQQPONMKIGDB@= ; 9 6 5 3 1 /.-,++******++,-./ 0 1 2 2 3 4 4 5 5 5 4 4 3 1 0.,*(&$" ?=%&()*,-/ 0 2 4 6 9 ;=@BEHKNQUX!\$`'d*h.l 2q 9xDV3rZī/mG$`&b(e)g+i-k 0n 4t@#_mUT<{ 3s 0o.m-k+h(f&c$`"] ZOPPQQRRRQQP$%&()*+-./ 0 1 2 2 3 3FHKNRUX ["^$`&c(e)g,j 1p>~V5uNS?&cJ :y 0n#^#^"^"^"^"^"^"]"]"]"]"]"]!\!\!\!\ 0 1 1 1 1 1 1 1 1 1mB0n%aWOHA ;z 5t 0n+h&b!]WRMHD@= ; 9 8 7 7 8 9 :<>@BEGIJLMMNMMLKIGECA?= ; 9 7 5 3 2 0/.-,,+++++,,,-../ 0 0 1 2 2 2 2 2 2 1 0 /.-+)(&$" ?<&')*,-/ 1 2 4 7 9 ;=@CEHKNQUX!\$_'c*g-k 1p 6u>~L$`>~v@L#_%a&c(e*h,j.l 1p 9yM?.mD 6u 1p/n-k+i)f'd%a#^![XNOOPPPPPPO#%&')*+-./ 0 1 2 3 3 4 4 5JMPSVZ!\#_%b'd(f*h.m 8xL-kM`U7wYB 4t"^"^"^"^"^"]"]"]"]"]"]!]!\!\!\!\!\![ 3 3 3 3 3 3 3 3 3 3 3pC0o%bXOHA ;{ 5t 0n+h&c"]WRMIDA> ; 9 8 7 7 7 8 9 :<>@BDEGHIJJJJIHGEDB@>< ; 9 7 5 4 2 1 0/.--,,,,,,---..// / 0 0 0 0 //.-,+*(&%#!> ;')*,-/ 1 3 5 7 9;>@CFHKNRUX!\#_&c)g-k 0o 4t ;zDT-kSTR"]$_%b'd)f+h,k/m 4s@$`qQQ :z 2q 0n.l,j*g(e%b#_!\YMNNOOOOOO"#$%'(*+,./ 0 1 2 3 4 5 5 5 6 6NRUX ["^$`&c(e)g,j 3rC$_EhkO.lO;{ 0o"]"]"]"]"]"]"]"]"]!\!\!\!\!\!\![ [ [ 5 5 5 5 5 5 5 5 6 6 6 6rD0o%bXOHA ;{ 5u 0o+i&c"]XSNIEA>; 9 8 7 6 6 6 7 8 :;=?ABDEFFGGGFEDCB@?=; : 8 6 5 4 2 1 0/..---,,,,,----....---,+*)('%$" ?< :)*,-/ 1 3 5 7 9<>@CFILORUX!\#_&c)f,j/n 3r 8x?K"]<|hYB"^$`&b(e)g+i-k 0o 8xK;z'dA 4t 0o.l,j*h(e&c$`"] ZWLMMNNNNN!"$%&()+,-/ 0 1 2 3 4 5 6 6 7 7 7 7SVY!\#_%a'c(e*h/n ;{S:zflC#_E 5u"]"]"]"]"]"]"]!]!\!\!\!\!\!\![ [ [ [ Z 7 7 7 7 7 8 8 8 8 8 8 8 8sD0o%bWOHA;{ 6u 0o+i'd#^YSNJFB>< 9 7 6 5 5 5 6 7 8 9 :<=?@ABCCCCCBA@?>< ; : 8 7 5 4 3 2 1 0/.---,,,,,,,,,,,+++**)('&%$"!= : 8*+-/ 1 3 5 7 9<>ACFILORUX!\#_&c)f,j/m 2q 6v<|ES-kdx$`D!\#^%a&c(e*g,j.l 3r>~ Z[9xI 7w 1p.m,k+h)f'c$a"^ [XULLLMMMM!"#$&')*,-/ 0 1 3 4 5 6 6 7 7 8 8 8 8UX ["^$`&b'd)g,k 5tH-l\Ɗڋb5uS=| 0o"]!]!]!\!\!\!\!\!\!\!\![![ [ [ [ Z ZZ 9 9 9 9 : : : : : : : : ; ;sD0o%aWOHB<{ 6u 1p,j'd#_YTOJFB?< 9 8 6 5 4 4 4 5 6 7 8 9 :;<=>?????>>=< ; : 9 7 6 5 4 3 1 0 //.-,,,+++******)))(('&%$#"!  ; 9*+-/ 1 3 5 7 9<>ADFILORUX!\#_&b)f+i.m 2p 5t :y@K#_I(eF ["]#_%a'd)f*h,j/n 6uF-k|KS ;z 2q/m-k+i)f'd%a#^!\YVJKKLLLL !"$%'(*+-. 0 1 3 4 5 6 7 8 8 9 9 9 9 9 8Y!\#_%a'c(e+h 0o?"]Mڧ刺Q(eG 6u!\!\!\!\!\!\!\!\!\!\![ [ [ [ [ Z Z ZZY ;;<<<<<<=======qC/n%aWOHB<| 6v 1p,k(e$_ ZUPKGC@< : 8 6 5 4 3 3 3 4 4 5 6 7 8 9 : : ; ; ; ; ; ; : 9 9 8 7 6 5 4 3 1 0 //.-,++**)))((((''&&%%$#"!  ; 9 7+-/ 1 3 5 7 9<>ADFILORUX!\#_&b(f+i.l 1p 4s 8w=}FV8x~*hHY!["^$`&b'd)f+h-k 1p :zO:yU [>~ 3r/n-k+i)g'd%b#_!\YVTJJJKKK !"#%&()+-. 0 1 3 4 5 6 7 8 9 : : : : : : 9 ["]$`&b'd)g-k 8wP<{}չx>}V>~ 1o!\!\!\!\!\!\![![ [ [ [ [ [ Z Z ZZYYY=>>>>>??????????oA.m$`WOHB<| 7w 2q-k)f$` [VQLHD@= : 8 6 4 3 2 2 2 2 2 3 3 4 5 5 6 6 7 7 7 7 7 7 6 6 5 4 3 3 2 1 0/.-,++*))((''&&&%%$$##"!!  9 7 6-/ 1 3 5 7 9<>ADFILORUX!\#_&b(e+i.l 0o 3r 7v ;{BO,jds+iIWZ!\#^$`&c(e)g+i.l 3r>~V@~֮Q#_B 5t /n-k+i*g(e&b$_"] ZWTRIIIJJ !#$&')+,. 0 1 3 4 5 7 8 9 : : ; ;;; ; ; : :#_%a&c(e+i 2qD,jh̺_,jI 6v!\!\![![![![ [ [ [ [ [ Z Z Z ZZYYYYX@@@@@AAAAAABBBBBBl@-l$_VOHB=} 8w 3r.l)g%a!\WRNIEA> ; 8 6 4 3 2 1 1 0 0 1 1 1 2 2 2 3 3 3 3 3 3 3 2 2 1 1 0/.--,+*))(''&&%%$$##""!!  9 7 6 5/ 0 3 5 7 9<>ADFILORUX!\#_%b(e+h-k 0n 3r 6u 9y?J$`Kc)gIVX Z"]#_%a&c(e*g,j/m 5tBZ;{[_A"^C 6u 0n-k+i*g(e&b$`"] ZXURHHHII !"$%'(*,./ 1 3 4 6 7 8 9 : ;<<<==<<< ;$`&b'd)g.m;{YPEY?~ 1o [ [ [ [ [ [ [ [ Z Z Z ZZZYYYYXXXBBBBCCCCDDDDDDDDDDi>~,j#_VOIC=} 8x 3s/m*h&c"]XSOJFB?< 9 7 5 3 2 0 0////////// /////..--,++*))('&&%%$##""!!   8 6 5 4 0 2 5 7 9<>ADFILORUX![#^%b(e*h-k/n 2q 5t 8x=}FX9xl͜፽Q&cITWY ["]$_%a'c(e*h,j 0n 7vDW.l7v.mWC 6u 0o-k+i*g(e&c$`"^ [XURPGGGH "#%&(*+-/ 1 2 4 6 7 8 : ;<<=>>>>>==< ;&c(e+i 5tK:zm0oJ 6v [ [ [ [ [ [ Z Z Z Z ZZZYYYYXXXXWDDDEEEEFFFFFGGGGGGGe<{+i"^VOIC>~ 9y 4t 0n+i'd#_ZUPLHD@= : 7 5 3 1 0/..---,,,,,,,,+++**))(''&%%$##""!   8 6 5 5 4 2 4 7 9<>ACFILORUX [#^%a'd*g,j/m 1p 4s 7v ;{CQ,jLjd@"]GSUWY!\"^$`%b'd)f*h,k 0o 7wBPYXM@ 5u 0n-k+i*g(e&c$`"^ [XVSPFFFF !"$&')+-/ 0 2 4 6 7 9 : ;<=>>?????>>=<'d*g 0n@)fnL [?~ 0o Z Z Z Z Z Z Z ZZZYYYYYXXXXWWWFFGGGGHHHHIIIIIIIIIIIa:y*h"]UOID? :z 5u 1o,j(e$`![VRMIEA> ; 8 6 3 2 0/.-,++***))))(((''&&%%$##""!   6 6 5 5 5 4 6 9;>ACFILORUX ["^%a'd)g,j.m 1o 3r 6u :y@K#^7wIF1pVE<|SVX Z!\#^$`&b'd)f*h-k 0o 6v>~EGC ;{ 4s/n-k+i)g(e&c$`"^![YVSPNEEE!"#%')*,. 0 2 4 6 7 9 :<=>??@@@@@@??>=(e,j 8wUPx2qK 6u Z Z Z Z ZZZZYYYYYYXXXXWWWWVHHIIIJJJJKKKKKLLLLLLLL\8w)f!\UOJD@ ;{ 6v 2q-l)g%b"]XSOKGC?< 9 7 4 2 0/-,+*))((''&&&%%$$##""!   7 6 5 5 5 5 6 9 ;>@CFILNQTW Z"]$`'c)f+i.l 0o 2q 5t 8x>}GU(f3r2q%bPCPRTVX Z!\#^$`&b'd)f*h-k 0o 4t 9x;{ :z 6v 2q.m,j+h)g(e&c$`"^![YVSQNLDD !#$&(*,. 0 2 4 5 7 9 :<=>?@AABBBAA@@?>= 2qG7vP [>~ 0nZZZYYYYYYYYXXXXXWWWWVVVJKKKLLLLMMMMNNNNNNNNNNNX6u(e![UOJE@<{ 7w 3r/m*h'c#_ZUQMHEA> ; 8 5 3 1/-,+*)('&&%$$##""!!   7 6 6 6 6 6 7 8 ;=@CEHKNQTW Z"]$`&c(f+h-k/n 1p 4s 7v;{BMX$a$`VJ@NPRTWY ["]#_$a&b'd)f*h,j/m 2p 4s 4s 2q 0n-l,i*h)f'd&b$`"^![YVSQNLCC!"$&')+-/ 1 3 5 7 9 ;<>?@ABBCCCCCBBA@?==|%ao3rJ 5tYYYYYYYYYYXXXXXWWWWVVVUULMMMNNNOOOOPPPPPPQQQQQQQP4t'd [TOJFA=| 8x 4s 0n,j(e$`!\WSNJGC?< 9 7 4 2 0.,+)('&%$$#""!   7 7 7 7 7 8 9 :=@BEHKNQTWY!\$_&b(e*h,j.m 1o 3r 6u 9y?~FNSSMD=}MOQSUWY ["]#_%a&b'd(f*g+i-k/m/n/m-l,j+h*g(f'd&b$`"^![YVSQNLIB "#%')+-/ 1 3 5 7 9 ;<>?ABCCDDEEDDCCBA@>PLQ Z=}YYYYYYYXXXXXXXWWWWVVVVUUUNOOOPPPQQQRRRRRSSSSSSSSSSS3r&c ZTOKFB>} 9y 5u 1p-k)g&b"^YUQLIEA> ; 8 6 3 1/-+*('&%$#"!   7 7 7 8 8 9 : ;=?BEGJMPSVY!\#_%a'd)g,i.l 0n 2q 4t 7w;{AFIIE@ ;{KMOQSUWY!["]#_$a&b'd(e)g*h+i,j,j,i+h*g)f(e'c%b$`"] [YVSQNLIAA !#$&(*,. 0 3 5 7 9 ;<>@ABCDEFFFFFEEDCBA?>1pH 4sXXXXXXXXXXXWWWWWVVVVUUUTTPPQQRRRSSSTTTTUUUUUUUUUUUUUU&b ZTPKGC?~ ;z 6v 2q.m+h'd$` [WSOKGD@= : 7 5 2 0.,+)(&%$#"!  8 8 8 9 9 : ;<=?ADGJMORUX ["^%a'c)f+i-k/m 1p 3r 5u 9x<|@BB?<| 9xJLNPRTVXY!["]#_$`%b'c(e(f)g*g*h*g)g)f(e'd&c%a#_"] [XVSQNLIGA!"$&(*,. 0 2 4 7 9 ;=>@BCDEFGGGHGGGFEDCA@>NW ;{XXXXXXXXXWWWWWWVVVVUUUUTTTRRSSTTTUUUVVVVWWWWWWXXXXWWWWW%aYTPLHD@<{ 8w 4s 0o,j)f%b"]YUQMIFB?< 9 7 4 2 0.,*('&$#"!  9 9 : : ;;<=>?ADFILORUW Z"]$`&b(e*h,j.l 0n 2q 4s 6v 9x ;{=|=| ;{ 9y 7v@JLNPRTVXZ!["]#_$`%a&c'd(e(e(f(f(e(e'd&c%b$`#^!\ ZXVSQNLIGE !#%')+- 0 2 4 6 8 :=>@BCEFGHHIIIIHHGFEDBA?}.lE 2qXXXWWWWWWWWWVVVVVUUUUTTTSSTTTUUVVWWWXXXXYYYYYZZZZZZZYYYYXYTPLHDA=| 9x 5t 1p.l*h'd$_![WSOLHEB?< 9 6 4 2 /-,*('%$#"!  : ; ;<<=>?@ABCFHKNQTWY!\#_%a'd)f+i-k/m 0o 2q 4s 6u 8w 9x 9x 8w 6v!\@IKMNPRTVXZ ["]#^$`%a%b&c'c'd'd'd'c&c%b%a#_"^!\ZWUSPNKIGE!#$&(+-/ 1 4 6 8 :<>@BDEGHIIJJJJJJIHGFECB@>HS 9xWWWWWWWWWWVVVVVVUUUUTTTSSSRUVVWWWXXYYYZ Z Z [ [ [![!\!\!\!\!\!\!\!\!\![![ [ [ Z ZTPLIEA>} :z 6v 3r/n,j(f%a"]ZVRNKGDA>; 9 6 4 2 0.,*('&$#"! <<==>??@ABCDEHKMPSVX ["^$`&c(e*h,j-l/n 1o 2q 4s 5t 6u 6u 5u 4t [@GIKMOQRTVXY [!\"^#_$`%a%b&b&b&b&b%a%a$`#^"] [YWURPMKIGD "$&(*,. 1 3 6 8 :<>@BDFGHJJKLLLLLKJIHGFDBA?q(fA 0oWWWWWWVVVVVVVUUUUUTTTSSSSRRWWXXYYZ Z Z [![!\!\!\"]"]"]"]"^"^"^"^"^"^"^"^"^"^"]"]"]"]!\!\TPMIFB?~ ;{ 7w 4s 1o-k*g'd$`!\XTQMJGDA> ; 9 6 4 2 0.,+)(&%$#"! =>>?@@ABCDEFGHJMORUW Z"]#_%b'd)f+h,j.l/n 1o 2q 3r 3r 4s 3r [ Z@FHIKMOQRTVWY [!\"]#^#_$`$`$a%a$`$`#_#^"]!\ ZXVTROMKHFD !#%'),. 0 3 5 7 :<>@BDFHIJKLMMNNMMLKJIHGECA??~N 6uVVVVVVVVVVVUUUUUTTTTSSSSRRRQXYYZ Z [ [!\!\!]"]"]"^#^#_#_#_$_$`$`$`$`$`$`$`$`$`$`$`#_#_#_#^#^"^"]PMIFC?<| 9x 5u 2q/m+i(f%b#^ [WSPMJFCA>; 9 7 4 2 0/-+*('&%$#"! ?@@AABCDEFGGHIJKNQTVY!\#^$a&c(e*g+i-k.l/n 0o 1p 1p 2q 2pZY?DFHJKMOQRTVWY Z [!\"]#^#_#_#_#_#^"^"]!\ [YWUSQOMJHFD!#$')+- 0 2 5 7 :<>@CEFHJKLMNOOOOONNMLJIGFDB@a"^=}VVVVVVVVUUUUUUUTTTTSSSSRRRQQQZ Z [ [!\!\"]"]"^#^#_#_$`$`$a%a%a%b%b&b&b&b&b&b&b&b%b%b%b%a%a%a$a$`$`$_#_#^MJGC@=} :y 6v 3r 0o-k*h'd$`"]YVSOLIFDA>< 9 7 5 3 1 0.,+*('&%$#""!  AABBCCDEFGGHIJKKLLPSUX Z"]#_%a'd(f*g+i,k.l/m/n 0n 0oYYA?CEFHJKMOQRTUWXY Z![!\"]"]"]"]"]!\!\ [YXVTRPNLJGEC "$&(*-/ 2 4 7 9<>@CEGHJLMNOPPQQQPPONMKJHGECA4sH 2rUUUUUUUUUUUUTTTTTSSSSRRRRQQQP [![!\!]"]"^#^#_$_$`$a%a%b&b&c'd'd'd(e(e(e(e(e(e(e'd'd'd'd'c&c&c&c&b%b%a%a$a$`$`JGDA>} ;z 7w 4t 1p.m+i)f&b#_!\XUROLIFDA?< : 8 6 4 2 1/.,+*)('&%$##""!!!       CCDDEFFGHHIJKKLMMNNBTWY!\"^$`&b'd)f*h+i,j-k.l.mXXX@?BCEFHJKMOPRSUVWXY Z [ [![![ [ [ ZYXWUSQOMKIGEC !#%(*,/ 1 4 6 9;>@BEGIKLNOPQRRRRRRQPONLKIGECA?U 9xUUUUUUUUUTTTTTTTSSSSRRRRQQQPPP!\"]"]"^#^#_$`$`%a%b&c'c'd(e)f)g*g*h+h+i+i+i+i+h*h*g)g)f)f(f(e(e(e'd'd'd'c&c&c&b%b%b%aDA>~;{ 8x 5u 2r 0n-k*h'd%a"^ [XUROLIGDB?= ; 9 7 5 4 2 1/.-+*)(('&&%$$$#####EEFFGGHHIJJKLLMMNNOOCDEF Z!]#_%a&c'd)f*g+i,jVWWXB@?@BCEGHJKMNPQSTUVWXYYYZYYXXVUTRPNLJHFDB!#%'),. 1 3 6 8 ;>@BEGIKMNPQRSSTTTSSRQPOMLJHFDB@ATTTTTTTTTTTTTTSSSSSRRRRQQQPPPPO"]"^#^#_$`$`%a&b&c'd(e)f*g+i,j-k.l/m/n 0n 0o 0o 0n/n.m.l-k,j+i+h*h*g)g)f)f(f(e(e(e(e(e(e(e(e)fA?~<| 9y 6v 4s 1p.l+i)f&c$`"] ZWTQOLIGEB@>< : 8 7 5 4 2 1 0.-,++*)(('''&&GGHHHIIJKKLLMMNNOOOPPPDEFGIJK%a&cPRSTUVWWA@>?@BCEFHJKMNOQRSTUVWWXXXWWVUTRQOMKIGEDB "$&)+. 0 3 5 8 ;=@BEGIKMOPRSTTUUUUUTSRQPNMKIGDB@ 4sTTTTTTTTTTSSSSSSRRRRRQQQPPPPOOO#^#_$_$`%a%b&c'd(e)f*h+i-k.m 0o 2q 3s 5t 6v 7w 8w 8w 7w 6v 5t 4s 2q 1o/n.l-k,j+i+i*h*h*h*h*h*h*h+i+i,j.l/n 2q 5t<| :y 7v 4t 2q/n-k*h(e%b#_!\YVTQOLJGECA?=; : 8 7 5 4 3 1 0/.--,++**JJJJKKKLLMMNNOOOPPPPQQQEFGHIJLMOPRSTUVVBA@>>?@BCEFHIKLNOPQRSTUUVVVUUTSRQONLJHGECA "$&(*- 0 2 5 8 :=?BDGIKMOQRTUVVWWWWVVUTRQOMKIGEC@TTTTTSSSSSSSSSRRRRRQQQQPPPPOOONN#_$`$`%a%b&c'd(e)g+h,j.m 0o 3r 6u 8x;{>~ACDDDCA>~;{ 9x 6u 3s 1p 0n.m-l-k,j,j,j,j-k-k.l/n 1p 3r 6v :z?EMW 8w 5t 3r 0o.l+i)f'd%a#^![YVSQOLJHFDB@>= ; 9 8 7 5 4 3 2 1 0 0/.MMMMMMMNNNOOPPPQQQQQQQQQQFGHIKLMOPQSTUUUBA?><>?@BCEFHIJLMNOPQRSSSTTSSRRQONLKIGFDB@!#%'*,/ 2 4 7 :<?BDGIKMOQSTUVWXXYXXXWVUSRPNLJHFCASSSSSSSSSSSRRRRRRQQQQPPPPOOONNNM$_$`%a%b&c'd(e)g+i-k/m 2q 5t 8x=|AFKOSVXXWUQLGB>} 9y 6u 3r 1p 0o/m.m.m/m/n 0o 1p 3r 6u 9x=}BIR"],j;{Pl͒ 1p/m,j*h(e&b$`"] [XVSQOLJHFDCA?>< ; : 8 7 6 5 4QPPPOOPPPPPQQQQRRRRRRRRRRRQQHIJKLNOPQRSTUCB@? : ;<>?@BCDFGHJKLMNOPPQQRRQQPPONLKIHFDCA?!#%'),. 1 4 7 9<?ADGIKNPQSUVWXYZ Z Z ZZYXWVTSQOMKHFDASSSSSSRRRRRRRRQQQQQPPPPOOONNNNMM$`%a%b&c'd(e)f*h,j/m 1p 5t 9y>~DKR Z&b,j1p6u8w8w5u1p+i%aXPIB=| 8x 5u 3r 2q 1p 1p 2p 3r 4s 6v 9y=}BIQ [*h8xKfʊڹ+i)f'd%a#_!\ ZXUSQOMKIGECB@?=< [XVTSSRRRRRRRSSSSSSSTTTTSSSSRRQQIJLMNOPQRSEDCA@> 9 : ;<=?@ACDEGHIJKLMNNOOOPOOONMLKIHGECB@> "$&)+. 1 3 6 9<>ADFIKNPRTUWXY Z [![!\!\![ [ ZYXWUTRPNKIFDB?RRRRRRRRRRQQQQQPPPPPOOONNNNMMML$`%a&b&c'd(e*g+i-l 0o 3r 8w=}CKU$_-k9xER^glkeZM?2q'dXNF? ;z 7w 5u 4t 4s 5t 6v 8x<{@EKT#^-k:zMfˇٱUSQO/n ZK? 6u.m)f%a"^ [YWVVUUUUUUUUUUUUUUUUUTTTTSSRRQPOLMNOPQRFEDBA?> 7 8 : ;<=>@ABCEFGHIJKLLMMMMMMMLKJIHGEDB@?= !$&(+- 0 3 6 8 ;>ADFIKNPRTVWY Z [!\!]"]"]"]"]!\![ ZYXVTRPNLIGDB?RRRRRQQQQQQQQPPPPOOOONNNNMMMLLL$a%a&b'c'd)f*g,j.l 1p 4t 9y?GQ!\,j;zNe֚匽pV@/n#_SIB=} :y 8x 8w 8x :y<|@DJQ Z(e2q@Rīؤ倵`G3r$aRF<| 4s.m*g&c$_"] [ZYXXWWWWWWWWWVVVVVVVUUUTTSRRQPONMLKJHGFDCA@?= 6 7 8 9 ;<=>?@BCDEFGHIIJKKKKKKKJIHGFEDBA?><!#%(*- / 2 5 8 ;>@CFHKMPRTVXY [!\"]"^#^#_#_#_#^"^"]!\ ZYWUSQOLJGEB?QQQQQQQQQPPPPPPOOOONNNNMMMLLLLK$a%a&b'c'd(f*g,i.l 1o 4t 9y@HR$`1pD]֩kL6v'dVLD?<| ;{ ;{<|>~BFKRY&b.l8wDQ`o~Ջ۔ޙވyhWG8w+i!\PF>} 7v 1p-k)f&c$`#^"]!\ [ ZZYYYYYXXXXXXWWWWVVUUTTSRRQPONMLJIHGEDBA?>< 5 6 7 8 9 :;=>?@ABCDEFGGHHIIIIIHHGGFECBA?>= ;!#%'*,/ 2 5 7 :=@CFHKMPRTVXZ [!]"^#_$_$`$`$`$`$_#_"^"]![ZXVTROMJHEB@QQQQPPPPPPPPOOOOONNNNMMMLLLLKKK$`%a&b&c'd(e)g+i-k 0n 3r 8w>~FP"]0nC`ȈھzT;z*gYNFB?~>}>}?ADHMRX#^(e-l3r8x=|@BB@=|8w2p+i%aXPIB<{ 6v 2q.l+i(f&c%a$`#^"]"]!\!\![ [ [ [ Z Z ZZYYYXXXWWVVUUTSRQQPOMLKJIGFDCB@?= 3 4 5 6 7 8 9 : ;<=>?@ABCDEEFFGGGGGFFEEDCB@?>=; : "$'),. 1 4 7 :=@BEHKMPRTVX Z!\"]#^$_$`%a%b%b%b%a%a$`#_"^!\ [YWTRPMJHEB@PPPPPPPPOOOOOONNNNNMMMLLLLKKKJJ$`%a%b&c'd(e)f*h,j.m 1p 5u :zAKW)f:zTyԯ쁵X=|+i ZOHC@>~>~?ACEHKNQTVXXXXVSPLHD? ;{ 7w 4s 1o.l+i)g(e&c%b%a$`#_#^"^"]"]"]!\!\!\![ [ [ Z ZZYYXXWWVUUTSRQPONMLJIHGEDBA?>< 2 3 4 5 5 6 7 8 9 :;<=>?@ABBCDDDEEEEDDCCBA@?>=; : 9 "$&)+. 1 4 6 9<?BEHJMORTVX Z!\"]#_$`%a&b&c'c'd'c&c&b%a$`#_"]![YWUSPMKHEC@PPPOOOOOOOONNNNNMMMMLLLLKKKJJJI$`$a%a&b&c'd(e)g+h,k/m 2q 6v<{CMY,j?~[Ņ~V<{+h ZPHC@>}=|=|=}>}?~@AABBBBA?>}<{ :y 7w 5t 3r 1p/m-k+i*g)f(e'c&b%b%a$`$`#_#_#^#^"^"]"]!\!\!\ [ [ ZZYYXWWVUTTSRQPOMLKJHGFDCA@?= 0 1 2 2 3 4 5 6 7 8 9 : ;<<=>?@@AABBBCCBBBAA@?>=< ; : 9 8!#&(+- 0 3 6 9<?BDGJMORTVX Z!\"^$_%a&b&c'd(e(e(f(e(e'd&b%a$`"^!\ ZXUSPNKHFC@OOOOOOOONNNNNMMMMMLLLLKKKJJJJII#_$`%a%a&b&c'd(e)g+h,k/m 2q 6u;{BLX*g;{U|ԲrO8x)fYOGB>~<{ :z 9y 8x 8x 8w 8w 7w 7v 7v 6u 5u 4t 3r 2q 1p 0n/m-l,j+i*h)g(f(e'd&c&c&b%a%a$`$`$`#_#_#^"^"]"]!\!\ [ [ ZYYXWWVUTSRQPONMKJIHFECBA?></ 0 0 1 2 3 4 5 6 6 7 8 9 : ;<<=>>??@@@@@@@@??>=<< ; : 9 8 7!#%(*- 0 3 5 8;>ADGJLOQTVX Z!\"^$`%a&c'd(e)g*h*h*h*h)f(e'd&b$`#_"] [XVSQNKHFC@OOONNNNNNNMMMMMLLLLKKKKJJJJIIIH#_#_$`$a%a&b&c'd(e)f*h,j.m 1p 5t 9y?HR#_1pEbɌ솹_D2q%aULE@<| 9y 7w 6u 4t 4s 3r 2q 1p 1p 0o/n/m.l-k,k,j+i*h*g)f)f(e'd'd'c&c&b%b%a%a$`$`#_#_#^"^"]"]!\![ [ ZYYXWVUUTSRQONMLKIHGEDBA@>=-.// 0 1 2 2 3 4 5 6 7 7 8 9 : : ;<<==>>>>>>>>==<; ; : 9 8 7 6 5 #%'*,/ 2 5 8 ;>ADFILNQSVX Z!\"^$`%a&c(e)f*h,j-k.l-l,k+i)g(e&c%a#_"] [YVTQNKIFC@NNNNNNNMMMMMMLLLLKKKKJJJJIIIHHH"^#^#_$`$`%a%b&c'd(e)f*g+i-k/n 2q 6u ;{AJT%a2qD^Ɓ׮酸cI7v)g ZQIC>~ :z 7w 5t 3r 2q 1p 0o/n.m.l-k,j,j+i+h*h*g)g)f(e(e'd'd'c&c&b%b%a%a$`$`#_#^"^"]"]!\![ [ ZYXXWVUTSRQPOMLKJHGFDCA@?=<,-../ 0 0 1 2 3 3 4 5 6 6 7 8 9 9 : : ; ;<<<<<<<<; ; : : 9 8 7 7 6 5 4 "$'),/ 2 5 7 :=@CFIKNQSUX Z!\"^$`%a&c(e)g+i-k/n 1p 2q 2q 0o.l+i)f'd&b$`"^!\YWTQNLIFC@NMMMMMMMMLLLLLLKKKKJJJJIIIHHHGG"]"^#^#_$_$`%a%a&b&c'd(e)f*h+i-k /n 2q 6u ;zAHQ!\+i8xJa}՞抻mUB4s(f [QJD? ;{ 8w 5u 3r 2p 0o/n.m-l-k,j+i+i*h*g)g)f(f(e(e'd'd&c&b&b%a%a$`$`#_#^"^"]!\!\ [ ZYYXWVUTSRQPONLKJIGFDCB@?=<++,--.// 0 1 1 2 3 3 4 5 5 6 7 7 8 8 9 9 : : : : : : : : 9 9 8 8 7 7 6 5 4 4 3 "$&),. 1 4 7 :=@CEHKNPSUWZ!\"^$`%a&c(e)g+i.l 1p 5t 8w 9y 8x 5u 1p.l*h(e&c$a#^!\ZWTQOLIFC@MMMMMMLLLLLLKKKKJJJJIIIIHHHGGGG!\!]"]"^#^#_$`$`%a%b&b&c'd(e)f*g+i-k/m 1p 4t 8x=}CIR [(f2q>~L\l|Ԋڔޛ݅vgXI=}2q)f"]TMGB>} :z 7w 5t 3r 1p 0n.m-l-k,j+i+h*h)g)f)f(e(e'd'd&c&b%b%a$a$`$_#_#^"]"]!\ [ ZZYXWVUTSRQPONLKJIGFECB@?>< ;*++,,--.// 0 1 1 2 3 3 4 4 5 6 6 7 7 7 8 8 8 8 8 8 8 8 7 7 7 6 5 5 4 4 3 2 2!$&)+. 1 4 7 :<?BEHJMPRUWY!["]#_%a&c(e)g+i.l 2q 7v<|ADB=} 7v 1o,j)f'c%a#_!\ZWTROLIFC@=LLLLLLLKKKKKKJJJJIIIIHHHGGGGFF [![!\!]"]"^#^#_$`$`%a%a&b&c'd(e(f)g*h,j-l/n 2q 4t 8w<{@FKRY$`*g0n6u;{@CDDB?;z6u0o*h%a [UOIE@=| :y 7v 4t 2r 1p/n.m-k,j+i+h*h)g)f(f(e'd'd&c&c%b%a%a$`$_#_#^"]!]!\ [ ZYYXWVUTSRQPONLKJIGFECBA?>< ;))**++,,-..// 0 0 1 2 2 3 3 4 4 5 5 5 6 6 6 6 6 6 6 6 6 5 5 4 4 3 3 2 2 1 1!#&(+. 0 3 6 9<?BEGJMORTVY ["]#_%a&c'd)f+i-l 1p 7v?~HPTQH>} 4t.l*g'd%b#_"] ZWTROLIFC@=LLLLKKKKKKJJJJIIIIIHHHGGGGFFFEZ Z [![!\!\"]"^#^#_#_$`$`%a%b&b&c'd(e(f)g*h+i-k.m 0o 2q 5t 8w ;z>~AEILORUWXXXWUSPMJFC@=} :z 8x 6u 4s 2q 0o/n.l-k,j+i*h*g)f(f(e'd'd&c&b%b%a$`$`#_#^"^"]!\!\ [ ZYXXWVUTSRQPNMLKJHGFECBA?>< ;'(())**++,,--.// 0 0 1 1 2 2 2 3 3 4 4 4 4 4 4 4 4 4 4 3 3 3 2 2 1 1 0 0 /!#%(+- 0 3 6 9<>ADGILOQSVX Z!\#^$`&b'd)f*h,k 0n 5t=}IX(e-k)fXG 9y 0o+i(e%b$_"] ZWUROKHEB?=KKKKKKJJJJJJIIIIHHHHGGGGFFFEE$XYY Z Z [!\!\"]"]"^#^#_$_$`$`%a%b&b&c'd'd(e)f)g*h+i-k.l/n 1o 2q 4s 5u 7w 9x :z<{=}>}>~?~?~>~>}=|<{ :z 9y 8w 6u 5t 3r 2q 0o/n.l-k,j+i*h*g)f(f(e'd'd&c&b%b%a$a$`#_#_"^"]!]!\ [ ZZYXWVUTSRQPONMLKIHGFDCB@?>< ;&''((())**++,,--..// 0 0 0 1 1 1 2 2 2 2 2 2 2 2 2 2 2 1 1 1 0 0 ///.!#%(*- 0 3 5 8 ;>ADFIKNPSUWZ!\"^$`%b'd(e*g+i.l 2q 9yEV.mBLC.mS? 3r,j(e&b$`"] ZWTQNKHEB?<KKJJJJJJJIIIIIHHHHGGGFFFFEE$$$WWXXYY Z [ [!\!\!]"]"^#^#_#_$`$`$a%a%b&b&c'c'd(e(e)f)g*h+i,j,j-k.l/m/n 0n 0o 1p 1p 1p 1p 1p 1p 0o 0o /n/m.m.l-k,j+i+i*h*g)f(f(e'd'd&c&c&b%a%a$`$`#_#^"^"]!\!\ [ ZZYXWVUUTSRQPONLKJIHGEDCB@?>< ;%&&&''((())**+++,,--...// 0 0 0 0 0 1 1 1 1 1 0 0 0 0 ////... #%'*-/ 2 5 8 ;>@CFHKMPRTWY ["]#_%a&c(e)f+h-k /n 4t>}M)gIoσqH&bF 5u-k)f&b$`"] ZWTQNKHEB?<JJJJJIIIIIIHHHHGGGGFFFFEEE$$$$UVVWWXXYY Z Z [![!\!\"]"]"^#^#_#_$_$`$`%a%a%b&b&c&c'c'd'd(e(e)f)f)g)g*g*h*h*h*h*h*h*h*g*g)g)g)f)f(e(e'd'd'c&c&c&b%b%a$a$`$_#_#^"^"]"]!\![ [ ZYYXWVUUTSRQPONMLKJHGFEDBA@?=< ;$%%%&&&''((()))**++,,,---..../////////.....---- "%'*,/ 2 5 7 :=@CEHJMOQTVX Z!\#^$`&b'd(f*g+i.l 1p 7vBW9yo϶n4sM 8x.l)f&c$`"] ZWTQNKHEB?<JIIIIIIIHHHHHGGGGFFFFEEED$$$$$TTUUVVWWXXYYY Z Z [![!\!\!]"]"]"^#^#_#_#_$`$`$`$a%a%a%a%b&b&b&b&c&c&c&c&c&c&c&c&c&c&c&c&b&b&b%b%a%a%a$`$`$`#_#_#^"^"]"]!\!\![ [ ZYYXWWVUTTSRQPONMLKJIHGEDCBA?>=< :$$$$%%%&&&'''((())***+++,,,,--------------,,,,,, "$'),/ 2 4 7 :=?BEGJLNQSUWY!["]#_%a&c(e)g+h,j.m 2q 8xF$_MET :z.m)f&c$`"] ZWTQNKGDA>;IIIIHHHHHHGGGGFFFFEEEEDD##$$$$RRSSTTUUVVWWXXXYYZ Z Z [ [!\!\!\!]"]"]"^"^#^#^#_#_#_#_$_$_$`$`$`$`$`$`$`$`$`$`$`$`$_#_#_#_#^#^"^"^"]"]!\!\!\ [ [ ZZYYXWWVUUTSRQQPONMLKJIHGFECBA@?>< ; :###$$$$%%%&&&&'''((()))***+++++,,,,,,,,,,,+++++++ "$'),. 1 4 7 9<?ADGIKNPRTVX Z!\#^$`&b'd(f*g+i-k/m 2q :yI)gbX [<|/n)g&c$`"] ZWTQNJGDA> ;HHHHHHGGGGGGFFFFEEEEDDD###$$$$PQQRRRSSTTUUUVVWWWXXYYYZ Z Z [ [ [![!\!\!\!\!\!]"]"]"]"]"]"]"]"]"]"]"]"]"]"]!]!\!\!\!\![ [ [ Z ZZYYXXWWVVUTTSRRQPONMMLKJIHGFEDCA@?>=< ; 9""####$$$$%%%%&&&''''((()))))*******++++********** "$&),. 1 4 6 9<>ACFHKMOQSUWY ["]#_%a&c(e)f*h,j-l/n 3r :zJ.luj$`>}/n)g&c$`"]ZWTPMJGDA> ;HHHGGGGGGFFFFEEEEEDDD#####$$$$NOOPPPQQRRSSSTTUUUVVVWWWXXXXYYYYYZZ Z Z Z Z Z Z Z Z Z Z Z Z Z ZZZYYYYXXXWWWVVUUTTSSRQQPONNMLKJIHHGFEDCB@?>=< ; :""""""#####$$$$%%%%&&&&''''((((())))))))))))))))))) "$&)+. 1 3 6 9;>@CEHJLNPRTVX Z!\"^$`%b'c(e)g+i,j.l 0n 3r :zK0ow&b>~/n)f&b$_!\YVSPMJFC@= :GGGGGFFFFFEEEEEDDDDC######$$$$MMMNNNOOPPPQQRRRSSSTTTUUUUVVVVVWWWWWWWWXXXXXWWWWWWWVVVVUUUTTTSSRRQQPOONMMLKJJIHGFEDCBA@?>=< ; : 9!!!"""""""#####$$$$%%%%%&&&&''''''(((((((((((((((()) "$&)+. 0 3 6 8 ;>@BEGIKMOQSUWY [!\#^$`&b'd(f*g+i-k.l 0n 3r :yJ1o|&b>}/m)f&b#_!\YVSPLIFC@= :GFFFFFFEEEEEDDDDDCC#######$$$$KKKLLLMMNNNOOOPPPQQQQRRRSSSSSTTTTTTTUUUUUUUUUTTTTTTSSSSRRRQQPPOONNMMLKKJIIHGFEEDCBA@?>=<; : 9!!!!!!!!""""""######$$$$$%%%%%&&&&&&''''''''''''(((((( "$&)+. 0 3 6 8 ;=?BDFHJLNPRTUWY ["]#_$a&b'd)f*h+i-k.m 0o 3r 9yI/my$`<|.m(f%b#_!\YUROLIEB?< 9FFFFEEEEEDDDDDCCCC"""#####$$$$IIIJJJKKKLLLMMMNNNNOOOPPPPPQQQQQQQRRRRRRRRRRQQQQQQQPPPOOONNNMMLLKKJIIHGGFEEDCBAA@?>=< ; : 9   !!!!!!!!!!""""""######$$$$$$%%%%%%&&&&&&&&&&'''''''( "$&)+. 0 3 5 8 :=?ACEGIKMOQRTVXY!["]#_%a&c'd)f*h,j-k.m 0o 2q 8xF+im![ :z-l(e%a#^ [XURNKHEB?< 9EEEEEEDDDDDCCCCCB""""#####$$$%GGGHHHIIIJJJJKKKLLLLMMMMMNNNNNNNOOOOOOOOOOOONNNNNNMMMMLLLKKJJIIHHGGFEEDCCBAA@?>=<< ; : 9        !!!!!!!!!!"""""""######$$$$$$$%%%%%%%%&&&&&&&'''' "$&(+- 0 2 5 7 :<>ACEGHJLNOQSTVXZ!["]#_%a&c(e)f+h,j-k/m 0o 2q 7wD'dq\U 8w,j'd%a"^ [XTQNKGDA> ;EEEDDDDDDCCCCCBBB"""""#####$$$%EEEFFFFGGGHHHHIIIIJJJJJKKKKKKKLLLLLLLLLLLLLKKKKKKJJJJIIIHHGGGFFEEDCCBBA@??>==< ; : 9 8             !!!!!!!!!!"""""""#######$$$$$$$$%%%%%%%&&&&''' "$&(+- 0 2 5 7 :<>@BDFGIKLNPQSTVXZ!["]#_%a&c(e)g+h,j-l/m 0o 2q 6vB#^`IN 5t+i'd$`"] ZWTPMJGD@= ;DDDDDDCCCCCBBBBB""""""####$$$%%CCCDDDDEEEEFFFFFGGGGGHHHHHHHIIIIIIIIIIIIIIIHHHHHGGGGFFFEEEDDCCBBAA@@?>>=<< ; : 9 9                   !!!!!!!!!!""""""""#######$$$$$$$%%%%%&&&&' "$&(+- 0 2 5 7 9;=?ACEFHJKMNPQSTVXZ!["]#_%a&c(e)g+h,j-l/m 0o 2q 5u?XP8wG 2r*h&c$`!]YVSPLIFC@= :DDCCCCCCBBBBBAA"""""""####$$$%%AAAABBBBCCCCCDDDDDEEEEEEEFFFFFFFFFFFFFFFFFEEEEEEDDDDCCCBBAAA@@??>==<< ; : : 9 8                         !!!!!!!!!!""""""""#######$$$$$%%%%%&&&' "$&(+- 0 2 4 7 9 ;=?ABDEGHJKMNPQSTVXY ["]#_%a&c(e)g+i,j-l/m 0o 1p 4t=}RAw)gA 0o)f&b#_!\YVROLIEB?< 9CCCCBBBBBBAAAA!!""""""####$$$%%????@@@@@AAAAABBBBBBBCCCCCCCCCCCCCCCCCCCCCBBBBBAAAA@@@???>>==<<; ; : : 9 9                               !!!!!!!!!!""""""""######$$$$$%%%&&&' "$&)+- 0 2 4 6 8 :<>@ACDFGIJKMNOQRTVWY ["]#_%a&c(e*g+i,j-l/m 0n 1p 4s ;zM5tQX ;z.l(e%a#^ [XUQNKHEA>; 9BBBBBBAAAAAA@!!!!"""""###$$$%%%====>>>>>>???????@@@@@@@@@@@@@@@@@@@@@@@@@@????>>>>===<<< ; ; : : 9 9 8                                     !!!!!!!!!!"""""""######$$$$%%%&&&' "$&)+- / 2 4 6 8 :<>?ABCEFGHJKLNOPRTUWY ["]#_%a'd(f*g+i,j-l/m 0n 1p 3r 9xH*h^ƣw6vL 6u,j'd$`"] ZWTQMJGDA> ; 8BBBAAAAAA@@@!!!!!"""""###$$$%%& ; ; ;;<<<<<<<=========>>>>>>>>>>>>>>>>=======<<<<<; ; ; : : : 9 9 9 8!!!!!!                                    !!!!!!!!!!"""""""#####$$$$%%%&&&' "$&)+- / 2 4 6 8 :;=>@ABDEFGHIKLMOPRSUWY ["]$`&b'd)f*h+i,k-l.m/n 0o 2q 7wD"]EnsL$aB 2p*h&c$_!\YVSPLIFC@= : 7AAAAA@@@@@@!!!!!!!""""##$$$%%&& 4 4 3 9 : : : : : : : : ; ; ; ; ; ; ; ; ; ; ; ;;;;;; ; ; ; ; ; ; ; ; ; : : : : : : 9 9 9 9 8"""!!!!!!!!!!!!!!                                !!!!!!!!!!!""""""#####$$$$%%%&&&'' "$')+-/ 2 4 6 7 9 ;<>?@ABCDFGHIJKMNOQSUWY!\#_%a'd)f*g+i,j,k-l.m/n 0o 2q 6u@T2qJI1oP :z.m(f%b#^![XUROLHEB?< 9 7A@@@@@@??!!!!!!!!!"""###$$$%%&& 4 4 4 3 3 3 2 2 2 1 1 1 1 0 0 0///... 9 9 9 9 9 9 9 9 9++++*#######""""""""""!!!!!!!!!!!!!!!                           !!!!!!!!!!!!""""""#####$$$$%%%&&''(!#%')+-/ 1 3 5 7 9 :<=>?@ABCDEFGHIKLMOQRUW Z"^%a'd)g*h+i+i,j,j-k.l/m 0n 1p 4t<|L%b2q0n ZD 4s,j'd$a"] ZWTQNKGDA> ; 8 6@@@@????!!!!!!!!!!"""###$$%%&&& 5 4 4 4 3 3 3 2 2 2 1 1 1 1 0 0 0 ////...----,,,,$$##########"""""""""""!!!!!!!!!!!!!!!!!!                !!!!!!!!!!!!!!"""""""#####$$$$%%%&&&''((!#%')+-/ 1 3 5 7 8 : ;<=>?@ABCCDEFHIJLMOPSVY"^%b)f+i,j,k,j,j,j,j-k.l.m/n 1o 3r :yFU#^ ZK ;{ 0o*g&c$_!\YVSPMJFC@= : 8 5??????>     !!!!!""""##$$%%%&&' 5 5 4 4 4 3 3 3 2 2 2 2 1 1 1 0 0 0 0///....---$$$$$$###########""""""""""""!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!""""""""#####$$$$%%%&&&''(()!#%')+-/ 1 3 5 6 8 9 :;<=>?@@ABCDEFGHJKMOQTY#^'d+i/m 0o 0o/m-k,j,j,j,j-k.l/m 0o 2q 7w@JPK@ 4t-k(e%a#^![XUROLIEB?< : 7 4??>>>>       !!!!"""###$$%%&&'' 5 5 5 4 4 4 3 3 3 3 2 2 2 1 1 1 0 0 0 0////.%%$$$$$$$$$$###########""""""""""""""!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!""""""""""#####$$$$%%%%&&&''(()) "#%')+-/ 1 3 4 6 7 9 : ;;<=>>?@@ABCDEGHIKMOSX$_*g 0o 5t 7w 6v 4s 1o.l,j+i+i,j-k.l.m/n 1p 5t;{BE@ 8w 0n*h'c$`"] ZWTQNKGDA>; 9 6 3>>>>>        !!!!"""##$$%%&&&'' 6 5 5 5 4 4 4 4 3 3 3 2 2 2 1 1 1 1 0 0 0 /%%%%%%%$$$$$$$$$$$############""""""""""""""""""""!!!!!!!!!!"""""""""""""""#######$$$$$%%%&&&'''(())** "$&(*,-/ 1 3 4 6 7 8 9 : : ;<<=>>?@ABCDEFHIKNRY%b.l 7v?~CB=} 7v 1p.l+i+h+h+i,j-k.l/m 0o 3r 7w<{<| 8x 2q,j(e%b#_!\YVSPMJFC@= ; 8 5 3====         !!!!""###$$%%&&''( 6 6 5 5 5 4 4 4 4 3 3 3 2 2 2 2 1 1&&&%%%%%%%%%%$$$$$$$$$$$$##############"""""""""""""""""""""""""""""""""#########$$$$$%%%%&&&&''((())**+ "$&(*,./ 1 2 4 5 6 7 8 9 : : ; ;<<==>?@ABCEFHJMQY'd 3rAMTSLB 8x 1p-k*h*g*h+i,j-k-l.l/n 1p 4s 7v 6v 3r.l*g'c$`"] [XUROKHEB?< : 7 4====          !!!"""##$$%%&&''(( 6 6 6 5 5 5 4 4 4 4 3 3 3 2&&&&&&&&&%%%%%%%%%%%%$$$$$$$$$$$$######################"""""""################$$$$$$$%%%%%&&&'''((()))**++!#$&(*,./ 1 2 4 5 6 7 7 8 9 9 9 : : ;;<=>?@ABCEFHKQ Z*h :yN%a/m0n'dTD 8w /n+i)g)f)g*h+i,j-k-l.m 0n 2p 3r 2q/m+i(e%b#_!\YVSPMJGDA>; 9 6 3<<<          !!!""###$$%%&&'(() 7 6 6 6 5 5 5 5 4 4'''''&&&&&&&&&&&&%%%%%%%%%%%%$$$$$$$$$$$$$$$#############################$$$$$$$$$$%%%%%&&&&&'''((()))**++,, !#%'(*,./ 1 2 3 4 5 6 7 7 8 8 8 9 9 9 : ;;<=>@ABCEGJP [-kA"];zQVH0oVB 5t-k)g(e(e)f*g+h+i,j-k-l.m/n 0n.m,j)f&c$`"^ [XUROLIFC@= : 8 5 3<<     !!!"""##$$%%&&''(() 7 7 6 6 6 5(''''''''''''&&&&&&&&&&&&%%%%%%%%%%%%%%$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$%%%%%%%&&&&&''''(((()))**++,,-- "#%')*,./ 1 2 3 4 5 5 6 6 7 7 7 7 8 8 9 9 : ;<=>@ABDFJP![/nH/mZĈښX2qR=| 0o*g'd'c'd(e)g*h+i+i,j,j-k-l-k,j*g'd%a#_!\YWTQNKHEB?< 9 7 4 2 ;    !!!""##$$%%&&''(()) 7 7(((((((((''''''''''''&&&&&&&&&&&&&&%%%%%%%%%%%%%%%%%%%%$$$$$$$$$$$$$%%%%%%%%%%%%%%%&&&&&&'''''(((()))***++,,,--. "$&')+,./ 0 2 3 3 4 5 5 5 6 6 6 6 7 7 7 8 9 : ;<>?@BCEIP!\ 1pO=}V*gH 5u,i'd&b&b&c(e)f*g*h+i+i+i,j,j+i*g(e&b$`"] [XUROLIFCA> ; 8 6 3 1    !!"""##$$%%&''(())*)))((((((((((((''''''''''''''&&&&&&&&&&&&&&&&&%%%%%%%%%%%%%%%%%%%%%%%%%%%&&&&&&&&&&&''''''(((())))***+++,,---../!#$&()+,./ 0 1 2 3 3 4 4 4 5 5 5 5 5 6 6 7 8 9 :;=>@ACEHO!\ 2qSICW<|.m'd%a$`%a&b'd(e)f*g*h*h*h*h*h)g(e&c$a#^!\YWTQNKHEB?< : 7 5 2 0   !!!""##$$%%&&''())**)))))))((((((((((((((''''''''''''''''&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&'''''''''(((((())))****+++,,,--...// 0 !#%&()+,./ 0 1 2 2 3 3 3 3 4 4 4 4 4 5 5 6 7 8 9 ;<>?ACEHO [ 1pSNe*hE 1p(e$`#_#_$`%b&c(e(f)f)g*g*g)g)f(e&c%a#_"] ZXUROLJGDA>; 9 6 4 1/   !!!""##$$%&&''(()**+)))))))))))))(((((((((((((((('''''''''''''''''''''''''''''''''''''''''''(((((((())))))****++++,,,---..// 0 0 0 1 "#%'(*+-./ 0 1 1 2 2 2 2 2 3 3 3 3 3 4 4 5 6 7 9 :<>?ACEHNY /nPK9yM 5t)f#_"]"]"^$_%a&c'd(e(e)f)f)f(e'd&c%a$`"] [YVSQNKHEB@= : 8 5 3 0   !!""##$$%%&''(())*++*****))))))))))))))))(((((((((((((((((((((((((((((((((((((((((((((((((()))))))******++++,,,,---../// 0 0 1 1 2 2!"$&')*+-./ 0 0 1 1 1 2 2 2 1 1 2 2 2 2 3 4 5 7 8 :<>@BCEHNX-kJ@GT 8x*g#_ [ [!\"]#_$`%b&c'd(e(e(e(e'd&c%b$`"^!\ZWUROLIGDA>< 9 6 4 2 /   !!!""#$$%%&&'(())*++,*************))))))))))))))))))))))))((((((((((((((((()))))))))))))))*******+++++,,,,,---.../// 0 0 0 1 1 2 2 3 3 !#$&')*+-.// 0 0 1 1 1 1 1 0 0 0 1 1 2 2 3 5 6 8 :<>@BDFIMV)fB1pNY :z*g"^YXY [!\#^$`%a&b&c'd'd'd&c&b%a$`#^!\ ZXVSPNKHEB@= : 8 5 3 1/   !!""##$$%&&''())**+,,+++++**********************)))))))))))))))))))))))))))*************+++++++,,,,,----..../// 0 0 0 1 1 2 2 2 3 3 4 4!"#%&()*,-../ / 0 0 0 0 0 0 / / / 0 0 1 2 3 4 6 8 :<?ACEGJNU&b 9y"^\JX :y)g!\XVWX Z!\"]#_$`%a&b&c&c&b%b%a$`#^"] [YVTQOLIFDA>< 9 7 4 2 0.  !!""##$$%%&''())**++,-++++++++++++++++++***********************************+++++++++++,,,,,,,-----...../// 0 0 0 1 1 1 2 2 2 3 3 4 4 4 5 !#$%'()+,--./////////..// 0 1 2 4 6 8 :=?BDGIKNT#^ 1pL8x}=}R 7v(e ZVTTVXY ["]#^$`$`%a%a%a%a$`$`#^"] [YWURPMJHEB?= : 8 5 3 1/-   !!""##$%%&''(()**++,--,,,,,,,,+++++++++++++++++++++++++++++++++++++++++++,,,,,,,,,,,-------.....//// 0 0 0 0 1 1 1 2 2 2 3 3 4 4 4 5 5 6 6 "#$&'(*+,--..////....-.../ 0 2 4 6 8 ;>ACFHKMPT![+h=} [DxӦ苼V,jI 2q%aXSRRSUWY [!\"^#_$`$`$`$`$`#_#^"] [YWUSPNKIFCA>; 9 7 4 2 0.,  !!""##$$%&&'(()**++,--.,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,----------.......///// 0 0 0 0 1 1 1 1 2 2 2 3 3 3 4 4 4 5 5 6 6 6 7 !"$%&()*+,--......-------./ 0 2 4 6 9<?BEHJMOQTZ&c 2qE#^;{QXK2qU=},j"]UQOPQSTVX Z!\"]"^#_#_#_#_#^"^!\ [YXVSQOLJGDB?= : 8 5 3 1/-   !!"##$$%&&'(())*++,,-..-----------------------------------------------..........////// 0 0 0 0 0 1 1 1 1 1 2 2 2 3 3 3 3 4 4 4 5 5 6 6 6 7 7 8 8 "#$%'()*+,,--..----,,,,,,-. / 1 4 6 9<@CFJLOQSVY#_*h 5uEU&b*g%aSA 2q&bXQNMMOPRTVXY [!\"]"^"^"^"^"]!\ [YXVTROMJHEC@> ; 9 6 4 2 0.,  !!""##$%%&''())*++,,-../............-----------......................///////// 0 0 0 0 0 0 0 1 1 1 1 1 2 2 2 2 3 3 3 3 4 4 4 5 5 5 6 6 6 7 7 7 8 8 9 !"#%&'()*+,,------,,++++++,./ 1 4 7 :=AEHLORTVX Z"]&c,j 4s=}DGD<| 2q(e [SNLKKLNPRTUWY Z [!\!\"]!]!\![ ZYXVTRPNKIFDA?< : 7 5 3 1/-+   !""##$%%&''())*++,,-..//.........................../////////////// / 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 2 2 2 2 2 3 3 3 3 3 4 4 4 5 5 5 5 6 6 6 7 7 7 8 8 9 9 9 "#$%&()**+,,----,,+++****+,-/ 1 4 7 ;>BFJNQTWY Z!\"^$`'d+i/n 2q 3r 1p-k'd!\TOKIIIJLMOQSUWXY Z [![![ [ [ ZYWVTRPNLIGDB@= ; 8 6 4 2 0.,*  !!""#$$%&&'(()**+,,-..// 0///////////////////////// / / 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 3 3 3 3 3 4 4 4 4 5 5 5 5 6 6 6 7 7 7 8 8 8 9 9 9 : : ; !"#%&'()*++,,,,,,,++**)))**+-/ 1 4 8;?DHLPTWY!\"]#^#_$`%a&c(e(f(e&c#_ ZUOLIGGGHJKMOQSTVWXYZ Z ZZYXWVTRPNLJHEC@>< 9 7 5 3 0.-+)  !!""#$$%&&'(()**+,,-..// 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 3 3 3 3 3 3 4 4 4 4 4 5 5 5 5 5 6 6 6 7 7 7 7 8 8 8 9 9 9 : : : ; ; !"#$%&'()*++,,,,,+++*)))())*+-/ 1 5 8<@EINRVY!\#^$`%a%a%a%a%a$`$`#^!\XTPLIGFEFFHIKMNPRTUVWXYYYXXWUTRQOMJHFCA?< : 8 5 3 1/-,*  !!"##$%%&''()**+,,-..// 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 4 4 4 4 4 4 5 5 5 5 5 6 6 6 6 7 7 7 7 8 8 8 9 9 9 9 : : : ; ;<< !"#%&'())*+++,,,++**))((((()*,/ 1 5 9=AFKPTX!\#_%a&c'd'd'c&b%a$_"] [XUQNJHFEDDEFGIJLNPRSTVWWWWWWVUTRPOMKHFDB?= ; 8 6 4 2 0.,+)  !!""#$$%&''())*+,,-..// 0 0 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 5 5 5 5 5 5 6 6 6 6 6 7 7 7 7 8 8 8 8 9 9 9 : : : ; ; ;<<<= !"#$%&'()**+++++++**))(('''()*,/ 2 5 9>BGLQV Z#^%a'd(f)f)g)f'd&c$`"] ZWSPMJGEDCCCDEGHJLNOQRTUUVVVVUTSRPOMKIGDB@>; 9 7 5 3 1/-+*( !!""#$$%&&'())*++,-..// 0 0 1 1 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 6 6 6 6 6 6 7 7 7 7 8 8 8 8 8 9 9 9 9 : : : ; ; ;<<<== !"#$%&'(()**++++++**))(''&&''(*,. 2 5 9>CHMSX!\$`'d)f*h+i+i+h)g(e%b#_ [WTPMIGECBBBBCEFHJKMOPRSTTUUUTSSQPNMKIGEC@>< : 8 5 3 1 0.,*)'  !!"##$%&&'(()*++,--.// 0 1 1 2 2 2 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 7 7 7 7 7 7 8 8 8 8 9 9 9 9 9 : : : : ; ;;<<<===> !"#$%&'())*++++++**))(''&&&&'(),. 1 5 :>DINTY"^%b(e*h,j-k-k,j+i)f'd$`!\YUQMJGDCAAAABCDFGIKMNPQRSSSSSSRQPNLKIGECA?< : 8 6 4 2 0.-+)(  !!"##$%%&'(()*++,--.// 0 1 1 2 2 3 3 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 7 7 7 7 7 7 8 8 8 8 8 8 9 9 9 9 9 : : : : ; ; ;;<<<====> !"#$%&'(()**++++++**)(''&&%%%&')+. 1 5 :?DIOTZ#^&c)g-k 1o 2q 1o.m,k*h(e%b"^ZVQNJGDBA@?@@ABDEGIKLNOPQRRRRRQPONLKIGECA?= ; 9 7 5 3 1/-+*(' !!""#$$%&''()**+,--./ / 0 1 1 2 2 3 3 4 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 8 8 8 8 8 8 9 9 9 9 9 : : : : : ; ; ;;<<<<====>>>  !"#$$%&'())**+++++**)(('&&%%%%&')+. 1 5 :>DIOTZ#_'c+h 3sFQC 4s.l+i)f&c#_ [VRNJGDB@?>>??ABCEGIJLMNOPQQQPPONMLJIGECA?= ; 9 7 5 3 1/.,*)'&  !""#$$%&''()**+,--./ / 0 1 1 2 2 3 3 4 4 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 8 8 8 8 8 8 8 8 9 9 9 9 9 9 : : : : : ; ; ;;<<<<<===>>>>?!    !!"#$%&'(()**+++++**))('&&%$$$$%&(*- 1 5 9>CIOTZ#_'c-kDRYJ 1p,j)g'c$`![WSNKGDB@>===>?@BCEGHJKMNNOOOOONMLKJHGECA?=; 9 7 5 4 2 0.,+)(&%  !!"##$%&&'())*+,--./ / 0 1 1 2 2 3 3 4 4 5 7 7 7 7 7 7 7 7 7 7 7 7 8 8 8 8 8 8 8 8 8 9 9 9 9 9 9 9 : : : : : ; ; ; ;;;<<<<====>>>>??"!!     !"#$%&''())**+++++**))('&%%$$$$%&'*- 0 4 8=CHNSY"^&c-lT8x 7w,k)g'c$`![WSNKGDA?=<<<<=>@ACEFHIKLMMNNNNMMLKIHFECA?=< : 8 6 4 2 0/-+*('& !!"##$%%&'())*+,--./ / 0 1 1 2 3 3 4 4 5 5 5 7 8 8 8 8 8 8 8 8 8 8 8 9 9 9 9 9 9 9 9 : : : : : : : ; ; ; ;;<<<<<=====>>>>???#""!!!   !"#$%%&'())**++++++**)(''&%$$###$%'),/ 3 7<AGLRW!\%a+iJK ;z,j)f&c#_ [WRNJGCA>=< ; ; ;<=>@ACDFGIJKLLMMMLLKJIGFDCA?=< : 8 6 4 2 1/-,*)'&%  !""#$%%&'(()*+,,-./ / 0 1 1 2 3 3 4 4 5 5 5 6 8 8 8 9 9 9 9 9 9 9 9 9 : : : : : : : : ; ; ; ; ;;<<<<<<=====>>>>????$##"""!!    !"#$$%&'(()**+++++++**)('&%%$####$%&(+. 2 6 ;@EJPU Z#^'d 4s'dy"^ 4t+h(e%b"^ ZVRGEBC@>< : : : : : ;<>?ACDFGHIJKKKKKKJIHGEDBA?=< : 8 6 4 3 1/.,*)(&%$  !!"#$$%&'(()*+,,-.// 0 1 1 2 3 3 4 4 5 5 6 6 6 9 9 9 9 : : : : : : : : : ; ; ; ; ;;;<<<<<<=====>>>>>????@$$$###""!!!    !"##$%&''()**++++++++**)('&%$$#"""#$%'*- 0 4 9>CHMRW![$_(e 2qAC 6v,j(e&b$_!\XTKMKFA<= ; 9 9 8 8 9 : ;<>?ABDEFHHIJJJJIIHGFECB@?=; : 8 6 4 3 1 /.,+)('%$# !!"##$%&''()*++,-.// 0 1 1 2 3 3 4 4 5 5 6 6 7 7 : : : : : ; ; ; ; ; ;;;<<<<<<<======>>>>>????@&%%%$$$##"""!!!    !""#$%&''())*+++,,,,++**)('&%$##""""#%&)+/ 3 7;@EJOSX![#_&b(f)g(f'c%b$_"]YVJQTSMD= 9 : 8 7 7 7 8 8 9 ;<=?ABCEFGHHHIIHHGFEDCB@?=; : 8 6 5 3 1 0.,+*('&$# !!"##$%&&'()*++,-.// 0 1 2 2 3 3 4 5 5 6 6 6 7 7 8 ; ; ; ;;;<<<<<<<=======>>>>>>?????@'&&&%%%$$$##"""!!!    !""#$%&&'())*+++,,,,,,+**)('&%$#"""!"#$%'*- 0 4 9=BFKOSW Z?#^#_$_#_"^!\YVDMV!\![TI? 9 7 7 6 6 6 6 7 8 9 :<=?@BCDEFGGGGGGFEDCBA@>= ; 9 8 6 5 3 1 0.-+*('&%$#  !""#$%%&'()**+,-.// 0 1 2 2 3 4 4 5 5 6 6 7 7 7 8 8<<<<<<<=======>>>>>>??????@(('''&&&%%%$$$##"""!!!     !""#$%%&'())*++,,,,,,,,++*)('&%$#""!!!"#$&(+. 2 6 :>BGKN 9AGIHC ZYWUSCMY%b'd![NA 9 5 6 5 4 4 5 5 6 8 9 :<=?@ACDDEEFFFEEDDCA@?>< ; 9 8 6 4 3 1 0.-+*)'&%$#"  !""#$$%&'())*+,-../ 0 1 2 2 3 4 4 5 5 6 6 7 7 8 8 8 9========>>>>>>>??????@*))((('''&&&%%%$$$##"""!!!     !""#$%%&'(()*++,,,-----,,+**)(&%$#""!!!!"#%'),/ 2 6 :>B. 4=GPSPI@TRQN@KX*g$aQB 8 4 5 4 3 3 4 4 5 6 7 9 :<=>@ABCDDDDDDDCCBA@>=< : 9 7 6 4 3 1 0.-+*)'&%$#" !!"#$$%&'(()*+,-../ 0 1 2 2 3 4 4 5 5 6 7 7 7 8 8 9 9 9==>>>>>>>???????@++***)))(((''&&&%%%$$$###"""!!      !""#$%%&'(()*++,,--------,,+*)('&%$#"!!!!!"#%'),/ 2 6 :=- 3?LX#^XMBFLNJCFR#^$`$`QA 6 3 4 7 2 2 2 3 4 5 6 7 9 :<=>?@ABCCCCCCBBA@?>< ; : 8 7 6 4 3 1 0.-+*)(&%$#"! !!"##$%&'(()*+,-../ 0 1 2 2 3 4 4 5 6 6 7 7 8 8 8 9 9 9 :>>>>???????@@,,+++***)))((('''&&&%%$$$###"""!!!     !""#$$%&'(()*++,,--......--,++*)'&%$#"!!  !!"#%'),/ 2 5)+ 1=M#_ [N@KTWRI>HSYVJ< 3 1 3 7 1 1 1 2 3 4 5 6 7 9 :;=>?@AABBBBAA@@?>=< ; 9 8 7 5 4 2 1 0.-+*)(&%$#"!  !""#$%&''()*+,--./ 0 1 2 2 3 4 4 5 6 6 7 7 8 8 9 9 9 : : :??????@@.---,,,+++***))((('''&&&%%%$$$###"""!!!     !!""#$$%&'(()*++,--...////...-,+*)('&%$""!    !"#%')+. 1((- 6ES ZSHAN [ 9xYL? 6DHF> 5 0 0 3 7</ 0 0 1 2 4 5 6 7 9 :;<=>?@@@@@@@??>=< ; : 9 7 6 5 3 2 1/.-+*)(&%$#"!! !""#$%&&'()*+,--./ 0 1 2 2 3 4 5 5 6 6 7 7 8 8 9 9 : : : ; ;?@@///...---,,,++***)))((('''&&&%%$$$###"""!!!!     !!""#$$%&''()*++,--../// 0 0 ///..-,+*)('%$#"!!    !"#$&(*+)'(- 7AFC<=K ZYK= 4 6 8 7 3/. 1 5 9=.// 0 1 2 4 5 6 7 9 : ;<=>>??????>>=< ; : 9 8 7 6 4 3 2 0/.,+*)(&%$#""! !!"#$%%&'()*+,,-./ 0 1 2 2 3 4 5 5 6 7 7 8 8 9 9 9 : : ; ; ; ; 0 0 0///...---,,+++***)))((('''&&%%%$$$###"""!!!!      !!"##$$%&''()*++,--.// 0 0 0 0 0 0 0 0//.-,+*)(&%$#"!      !"$%'/,)''*. 1 1- 5AMTNB 7 0-.--- 0 4 9=@-../ 0 1 3 4 5 6 8 9 : ;<<==>>>>===< ; : 9 8 7 6 5 4 3 1 0/.,+*)(&%$#""! !"#$$%&'()*++,-./ 0 1 2 2 3 4 5 5 6 7 7 8 8 9 9 : : : ; ;;<< 0 0 0///...---,,+++***)))(((''&&&%%%$$$###"""!!!!       !!""##$%%&''()*++,-../ 0 0 0 1 1 1 1 1 1 1 0 0/..-+*)(&%$#"!     !"#$ 4 2/+('&'((, 2 :>; 4.+++,. 1 5 9>AC,--./ 0 2 3 4 5 6 7 9 9 : ;<<<==<<<; ; : 9 8 8 6 5 4 3 2 1 0.-,+*)'&%$#""! "##$%&'()**+,-./ 0 1 2 2 3 4 5 5 6 7 7 8 8 9 9 : : ; ;;<<<< 0 0 0///...---,,+++***)))(((''&&&%%%$$$###""""!!!         !!""##$%%&''()*++,-../ 0 0 1 1 2 2 2 2 2 2 2 2 1 1 0/.-,+)('%$#"!     !" : 9 8 5 2.,*)''(*++)))+/ 1 4 8=ADFG+,,-. / 1 2 3 4 5 6 7 8 9 : : ; ; ;; ; ; ; : : 9 8 8 7 6 5 4 3 1 0/.-,+)('&%$#""! "##$%&'(()*+,-./ 0 1 1 2 3 4 5 5 6 7 7 8 9 9 : : ; ;;<<<<== 1 0 0 0//...---,,,++***)))((('''&&%%%$$$####"""!!!!          !!!""##$%%&''()**+,-../ 0 1 1 2 2 3 3 3 4 4 4 3 3 3 2 1 1 0/.,+*('&%#"!!    !BA@> ; 7 4 2+)(((()*,. 6 9=AEHJK)*+,-./ 0 1 2 3 4 5 6 7 8 9 9 9 : : : : : : 9 9 8 7 7 6 5 4 3 2 1 0/.,+*)('&%$#""! "#$%&''()*+,-./ 0 1 1 2 3 4 5 5 6 7 8 8 9 9 : : ; ;<<<<==== 1 0 0 0///...--,,,+++***))((('''&&&%%%$$$###""""!!!!                  !!!""##$$%%&''()**+,-.// 0 1 2 2 3 3 4 4 5 5 5 5 5 5 4 4 3 2 2 1 /.-,*)(&%$#"!     IIHFC? 2 2 2 2 2 1 1 2 2 3 3>AEILNOO))*+,-./ 0 1 2 3 4 5 6 7 7 8 8 9 9 9 9 9 8 8 7 7 6 6 5 4 3 2 1 0/.-,+*)('&%$#""! "#$%&&'()*+,-./ 0 1 1 2 3 4 5 5 6 7 8 8 9 9 : : ;;<<<====>> 1 1 0 0 0//...---,,,++***)))(('''&&&%%%$$$####""""!!!!!!            !!!!"""##$$%%&''()**+,-../ 0 1 2 3 3 4 5 5 6 6 6 6 6 6 6 6 6 5 4 4 3 2 1/.-+*)'&%$#"!    YUOKHD ;=>???>< ; :DHKORTU'()*++,-. 0 1 2 2 3 4 5 6 6 7 7 7 8 8 8 7 7 7 6 6 5 5 4 3 2 1 0 /.-,+*)(''&%$#"!! #$%%&'()*+,-./ 0 0 1 2 3 4 5 6 6 7 8 8 9 : : ; ;<<<====>>>> 1 1 1 0 0 0//...--,,,+++***))((('''&&&%%%$$$####"""""!!!!!!!!!!!!!!!!!"""###$$%%&''())*+,-../ 0 1 2 3 3 4 5 6 6 7 7 8 8 8 8 8 8 7 7 6 6 5 4 3 2 1/.-+*)'&%$#"!!         !!EHJJJHEB$$LPV.m&&'(()*+,-./ 0 1 2 3 3 4 5 5 6 6 6 6 6 6 6 6 6 5 5 4 4 3 2 1 0 0/.-,+*)('&%$$#"!! $$%&'()*+,-./ / 0 1 2 3 4 5 6 6 7 8 8 9 : : ;;<<===>>>>>>? 2 1 1 1 0 0 ///...--,,,++***)))((('''&&&%%%$$$####""""""!!!!!!!!!!!"""""##$$$%%&''())*+,--./ 0 1 2 3 4 4 5 6 7 7 8 8 9 9 9 : : : 9 9 9 8 7 6 5 4 3 2 1 0.-+*)'&%$#""!!        !P Z%aVO""##$$$%&&'(()*+,-../ 0 1 2 3 3 4 4 5 5 5 5 5 5 5 5 5 4 4 3 3 2 1 0 0/.-,+*))('&%$##"!  #$%&'()*+,-../ 0 1 2 3 4 5 6 6 7 8 8 9 : : ;;<<===>>>????? 2 2 1 1 1 0 0 ///..---,,,++***)))(('''&&&&%%%$$$$####""""""""""""""""####$$%%%&''(()*+,,-./ 0 1 2 3 4 4 5 6 7 8 9 9 : : ; ; ;;;; ; ; : : 9 8 7 6 5 4 3 1 0/-,*)('&%$#""!!!!!!!!!!!!"""###$$%&&'(()*+,,-./ 0 0 1 2 2 3 3 4 4 4 4 4 4 4 4 3 3 3 2 1 1 0 //.-,++*)('&%%$#""!  $%&'()*+,,-./ 0 1 2 3 4 5 6 6 7 8 9 9 : : ;<<===>>>??????? 3 2 2 1 1 1 0 0 ///..---,,,++***)))(('''&&&&%%%$$$$#######""""""""#####$$$%%&&''(()*++,-./ 0 1 2 3 3 4 5 6 7 8 9 : : ;<<========<< ; : 9 8 7 6 5 3 2 1/.-+*)('&%$$##"""""""""""###$$%%&&'(()*++,-.// 0 0 1 2 2 2 3 3 3 3 3 3 3 3 2 2 1 1 0 0//.-,,+*)(''&%$$#"!! %&'())*+,-./ 0 1 2 3 4 5 6 6 7 8 9 9 : ; ;<<==>>>?????@@@@ 3 3 2 2 1 1 1 0 0 ///..---,,,++***)))((''''&&&%%%%$$$$$##############$$$%%%&&''(()**+,-./ / 0 1 2 3 4 5 6 7 8 9 : ;<<=>>>???????>>=< ; : 9 8 7 6 4 3 1 0/.,+*)('&&%$$$########$$$%%%&&'(())*+,,-..// 0 0 1 1 2 2 2 2 2 2 2 2 2 1 1 0 0 //..-,,+*)(('&%%$#""!  %&''()*+,-./ 0 1 2 3 4 5 5 6 7 8 9 9 : ;;<<==>>????@@@@@@@ 4 3 3 2 2 2 1 1 0 0 0//...--,,,++***)))((('''&&&&%%%%$$$$$$$$$$$$$$$$$%%%%&&''(()**+,,-./ 0 1 2 3 4 5 6 7 8 9 : ;<==>??@@AAAAAAA@@?>==; : 9 8 7 5 4 3 1 0/.,+*))(''&&%%%%%%%%%%&&&''(())**+,,--../ / 0 0 0 1 1 1 1 1 1 1 1 0 0 0//..--,++*))('&&%$##"!!  &&'()*+,-./ 0 1 2 3 4 5 5 6 7 8 9 9 : ;;<==>>>???@@@@@AAAA 4 4 3 3 3 2 2 1 1 0 0 0//...--,,,++***)))(((''''&&&%%%%%%$$$$$$$$$$$%%%%&&&''(())*++,-./ 0 1 1 2 3 4 6 7 8 9 : ;<=>>?@AABBCCCCCCCBBAA@?>=< : 9 8 7 5 4 3 1 0/.-,+**)(((''''''''''((())***++,,--../// 0 0 0 0 0 0 0 0 0 0///..--,,+**)(('&&%$$#""!! %&'()*+,-./ 0 1 2 3 4 5 5 6 7 8 9 9 : ;;<==>>???@@@@AAAAAAA 5 4 4 4 3 3 2 2 1 1 1 0 0///..---,,+++***)))((('''&&&&&%%%%%%%%%%%%%%%&&&&''(())**+,--./ 0 1 2 3 4 5 6 7 8 9 :;<=>?@ABCCDDEEEFFEEEDDCBA@?>=< : 9 8 7 5 4 3 2 1 0/.-,,+***)))))))))))***+++,,---...////////////...--,,++**)(('&&%$$##"!!  &'()*+,-./ 0 1 2 3 4 4 5 6 7 8 9 9 : ;<<==>>???@@@AAAAAAAAB 5 5 5 4 4 3 3 2 2 2 1 1 0 0 0//..---,,,++***)))((((''''&&&&&&&%%%%%&&&&&&'''(())**+,,-./ 0 0 1 2 3 4 5 6 8 9 : ;<=>?@ABCDDEFFGHHHIHHHGFEDDCBA?>=< ; 9 8 7 6 5 3 2 1 0 0/.--,,,++++++++++++,,,,----...............---,,++**))(('&&%%$##""!  &'()*+,-./ 0 1 2 3 4 4 5 6 7 8 9 9 : ;<<==>>??@@@AAAABBBBBBB 6 6 5 5 4 4 4 3 3 2 2 1 1 1 0 0//...--,,,+++***)))(((('''''&&&&&&&&&&&''''((())**++,--./ 0 1 2 3 4 5 6 7 8 9 : ;<=?@ABCDEEFGHIJKMNOONMLJIGFEDCBA@>=< ; : 8 7 6 5 4 3 2 1 1 0//..----,,,,,,,,,-------..........----,,,++**))((''&&%$$##""!!  '()*+,-./ 0 1 2 2 3 4 5 6 7 8 9 9 : ;<<==>???@@AAAABBBBBBBBB 7 7 6 6 5 5 4 4 3 3 3 2 2 1 1 0 0 0//...--,,,+++***)))((((('''''''''''''''((()))**+,,-../ 0 1 2 3 4 5 6 7 8 9 :;=>?@ABCDEFGHIKMOSX!\$`%a$a"^YUPMJHFEDCBA@>=< ; : 9 7 6 5 5 4 3 2 1 1 0 0//.....-----------------------,,,+++**)))((''&%%$$##""!!  ()*++,-. / 0 1 2 3 4 5 6 7 8 9 9 : ;<<=>>??@@@AAABBBBBCCCCCC 8 7 7 6 6 6 5 5 4 4 3 3 2 2 2 1 1 0 0 ///..---,,,+++***))))(((((((''''(((((())))**++,--.// 0 1 2 3 4 5 6 7 8 9 :;=>?@ABCEFGHIJLNSY&c 0o<{GNOI>~ 2q(e ZSMJHFEDCB@?>=< ; : 9 8 7 6 5 4 3 3 2 1 1 0 0 0///........---------,,,,,+++***)))((''&&%%$$##""!!  (()*+,-./ 0 1 2 3 4 5 6 7 8 9 9 : ;<<=>>??@@AAABBBBCCCCCCCCC 9 8 8 7 7 6 6 5 5 5 4 4 3 3 2 2 1 1 1 0 0///..---,,,+++***))))))(((((((((())))***++,,--./ 0 0 1 2 3 4 5 6 7 8 9 :;=>?@ABCEFGHIJLNS [*g :zS6u^ƅؕ߂Z3rQ 8x(eYQLIGFDCBA@?><; : 9 8 7 6 6 5 4 3 3 2 2 1 1 0 0 ////.....-----,,,,,++++***))(((''&&%%$$###""!!  ()*+,-./ 0 1 2 3 4 5 6 7 8 9 9 : ;<<=>>??@@AAABBBCCCCDDDDDDD : 9 9 8 8 7 7 6 6 5 5 5 4 4 3 3 2 2 1 1 1 0 0//...---,,,+++*****)))))))))))))****+++,,-../ 0 0 1 2 3 4 5 6 7 8 9 : ;<=?@ABCDEFHIJKMPV%a 4sO<|^'d@+i ZQLIGFEDBA@?>=< ; : 9 8 7 6 5 4 4 3 2 2 1 1 0 0 ///....----,,,++++***)))(((''&&%%%$$##""!!!  ()*+,-./ 0 1 2 3 4 5 6 7 8 8 9 : ;<<=>>??@@AABBBCCCDDDDDEEEEE ; : : 9 9 8 8 7 7 6 6 5 5 5 4 4 3 3 2 2 1 1 1 0 0//...---,,,++++*****************+++,,--../ 0 0 1 2 3 4 5 6 7 8 9 : ;<=>?@BCDEFGHIJLMQW&b 5uRBBR 5u%bVOKIGFDCBA@?>=< : 9 9 8 7 6 5 4 4 3 2 2 1 1 0 0//...---,,,+++***)))(('''&&&%%$$###""!!   )*+,-./ 0 1 2 3 4 5 6 7 8 8 9 : ;<<=>>??@AAABBCCCDDDEEEEEFFFF< ; ; : : 9 9 8 8 7 7 6 6 5 5 5 4 4 3 3 2 2 1 1 1 0 0///..---,,,,++++++********+++++,,,--../ 0 0 1 2 3 3 4 5 6 7 8 9 :;<>?@ABCDEFGHIJLMPT!],j>~!\H\,jG 1p$`VOKIGFEDCA@?>=< ; : 9 8 7 6 5 5 4 3 2 2 1 0 0//..---,,+++***)))(('''&&%%%$$###""!!!   )*+,-./ 0 1 2 3 4 5 6 6 7 8 9 : ;<<=>>?@@AABBBCCDDDEEEFFFFGGGG=<<; ; : : 9 9 8 8 7 7 6 6 5 5 5 4 4 3 3 2 2 1 1 1 0 0///...---,,,,,++++++++++++,,,,--.../ 0 0 1 2 2 3 4 5 6 7 8 9 : ;<=>?@ABCDFGHIIJKLNQU!\)f 4tEX,j8w;{3r&bO=|.m$`WQMJIGFEDCA@?>=< ; : 9 8 7 6 5 4 4 3 2 1 1 0 0/..--,,+++**)))(('''&&%%%$$$##"""!!   *+,-./ 0 1 2 3 4 4 5 6 7 8 9 : ;;<=>>?@@AABBBCCDDEEFFFGGGHHHHH>==<<; ; : : 9 9 8 8 7 7 7 6 6 5 5 4 4 3 3 2 2 2 1 1 0 0 0//....----,,,,,,,,,,,,,,,---..// 0 0 1 1 2 3 4 4 5 6 7 8 9 : ;<=>?@ABCDEFGHIJKLMNPRV [$`)f-k 0o 1o/m+i'c"]XSOLJIHFEDCBA@?>=< ; : 9 8 7 6 5 4 3 2 2 1 0 0/..--,++***))(('''&&%%%$$$##"""!!!   *++,-. / 0 1 2 3 4 5 6 7 8 9 : ;;<=>>?@@AABBCCCDDEEFGGHHIIIIJJI??>>==<< ; ; : : 9 9 8 8 7 7 6 6 5 5 4 4 3 3 2 2 2 1 1 0 0 0///....-------,,,,------...// 0 0 1 1 2 2 3 4 5 6 6 7 8 9 : ;<=>?@ABCDEFGHIIJKKLMNOPQSTUUTSQONLKJHGFEDCBA@?>=< ; : 9 8 7 6 5 4 3 2 2 1 0//.--,,+**)))((''&&&%%$$$##"""!!!   *+,-./ 0 1 2 3 4 5 6 7 8 9 : :;<=>>?@@AABBCCDDEEFFGHIIJJKKKLKK@@??>>==<< ; ; : : 9 9 8 8 7 7 6 6 5 5 4 4 4 3 3 2 2 1 1 1 0 0 0///......---------.....// 0 0 0 1 2 2 3 3 4 5 6 6 7 8 9 : ;<=>?@ABBCDEFGHHIJJKKLLLMMMMMMLKKJIIHGFEDCBA@?>=< ; : 9 8 7 6 5 4 3 2 1 1 0/..-,,++*))(('''&&%%$$$###""!!!   *+,-./ 0 1 2 3 4 5 6 7 8 9 : : ;<==>??@AABBCCDDEEFGHIIJKLMMNNNNNBAA@@??>==<<; ; : : 9 9 8 8 7 7 6 6 6 5 5 4 4 3 3 2 2 2 1 1 1 0 0 0////.............//// 0 0 0 1 1 2 2 3 4 4 5 6 6 7 8 9 : ;;<=>?@ABCCDEFFGHHIIJJJJJKJJJJIIIHGGFEDCCBA@?>=< ; : 9 8 7 6 5 4 3 2 1 0 0/.--,++*))((''&&%%%$$###"""!!!   +,-./ 0 1 2 3 4 5 6 7 8 8 9 : ;<==>??@AABBCCDDEFFGHIJKMNOOPQQQQPCBBAA@@??>>==<< ; ; : : 9 9 8 8 7 7 6 6 5 5 4 4 4 3 3 2 2 2 1 1 1 0 0 0 0/////////////// / 0 0 0 1 1 2 2 3 3 4 4 5 6 6 7 8 9 9 : ;<==>?@ABBCDDEFFGGHHHHHIIHHHHGGFFEEDCBBA@?>=< ; : 9 8 7 6 5 4 3 2 2 1 0/.--,++*))((''&&%%$$###"""!!!   +,-./ 0 1 2 3 4 5 6 6 7 8 9 : ;<==>??@AABBCCDDEFGHIJKMNOQRSTTTTTTDDCCBBAA@@??>==<<; ; : : 9 9 8 8 7 7 6 6 6 5 5 4 4 3 3 3 2 2 2 1 1 1 0 0 0 0 0 0 /////// / 0 0 0 0 0 1 1 1 2 2 3 3 4 4 5 6 6 7 7 8 9 : : ;<==>?@@ABBCDDEEEFFFFFFFFFFFEEDDCBBA@??>=< ; : 9 8 7 6 5 4 4 3 2 1 0/..-,++*))((''&&%%$$###""!!!   +,-./ / 0 1 2 3 4 5 6 7 8 9 : ;<<=>??@AABBCCDEEFGHIKLNOQSTVWXXYYXXFEEDDCCBBA@@??>>==<< ; ; : : 9 9 8 8 7 7 6 6 5 5 5 4 4 3 3 3 2 2 2 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 2 2 3 3 4 4 5 5 6 6 7 8 8 9 : : ;<<=>>?@@AABBCCCDDDDDDDDDDCCBBAA@??>=<< ; : 9 8 7 6 5 5 4 3 2 1 0//.-,,+*))((''&%%$$$##"""!!!   +,-./ 0 1 2 3 4 5 6 7 8 9 : ;;<=>>?@@ABBCCDEEFGHIKMOQSUWY Z!\"]"]"^"^"]!\GGFFEEDDCBBAA@@??>>==<; ; : : 9 9 8 8 7 7 7 6 6 5 5 4 4 4 3 3 3 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 3 3 3 4 4 5 5 6 6 7 7 8 8 9 : : ;;<==>>??@@@AAABBBBBBBBAAA@@??>>=<< ; : 9 9 8 7 6 5 4 4 3 2 1 0//.-,,+**)(('&&%%$$###""!!!   +,-./ 0 1 2 3 4 5 6 7 8 9 9 : ;<=>>?@@ABBCCDDEFGHJKMORTWY!\"^$`%a&c'c'c'c&c%bIHHGGFFEDDCCBBAA@@?>>==<<; ; : : 9 9 8 8 7 7 6 6 6 5 5 5 4 4 3 3 3 3 2 2 2 2 2 2 1 1 1 1 1 1 1 2 2 2 2 2 2 3 3 3 4 4 4 5 5 6 6 7 7 8 8 9 9 : : ; ;<<===>>??????@@@?????>>==<<; ; : 9 9 8 7 6 6 5 4 3 2 2 1 0//.-,,+*))(('&&%%$$###""!!!   ,-./ 0 1 2 3 4 5 6 7 7 8 9 : ;<==>??@AABCCDDEFGHJKMPRUX![#^%a'd)f*h+i,j,j,j+i*hJJIIHHGGFFEDDCCBBAA@??>>==<< ; ; : : 9 9 8 8 7 7 6 6 6 5 5 5 4 4 4 3 3 3 3 3 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 4 4 4 5 5 5 6 6 6 7 7 8 8 9 9 9 : : ; ;;<<<============<<<; ; ; : : 9 8 8 7 6 6 5 4 4 3 2 1 1 0/..-,++*))(('&&%%$$###""!!!   ,-./ 0 1 2 2 3 4 5 6 7 8 9 : ;<<=>??@AABBCDDEFGHJKMPSVY"]%a'd*h-k/m 1o 2q 3r 3r 2q 1p 0oLKKJJIIHHGGFEEDDCCBBA@@??>>==<< ; ; : : 9 9 8 8 7 7 7 6 6 5 5 5 4 4 4 4 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 5 5 5 5 6 6 7 7 7 8 8 8 9 9 9 : : : : ; ; ; ; ; ; ; ; ; ; ; ; : : : 9 9 9 8 8 7 7 6 6 5 4 4 3 2 2 1 0 //.--,++*))(('&&%%$$###""!!!   ,,-./ 0 1 2 3 4 5 6 7 8 9 : :;<=>>?@@ABBCCDEFGHIKMPSV Z#^&c)g-k 0o 3r 6u 8x :y ;z ;z :z 9x 7wNMMLLKJJIIHHGGFEEDDCCBAA@@??>>==<< ; ; : : 9 9 8 8 7 7 7 6 6 6 5 5 5 4 4 4 4 4 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 5 5 5 5 6 6 6 6 7 7 7 7 8 8 8 8 9 9 9 9 9 9 9 9 9 9 9 9 9 8 8 8 8 7 7 6 6 6 5 5 4 3 3 2 2 1 0 0/..-,,++*))(('&&%%$$$##""!!!   ,-./ 0 1 2 3 4 5 6 7 8 8 9 : ;<==>?@@ABBCCDEEFGIKMPSV [#_'d+i/n 3s 8w;{?~ACDDCB?OONNMMLLKJJIIHHGGFEEDDCCBAA@@??>>==<; ; ; : : 9 9 8 8 7 7 7 6 6 6 5 5 5 5 4 4 4 4 4 4 4 4 4 3 3 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 6 6 6 6 5 5 4 4 4 3 3 2 1 1 0 0//.--,,+**))((''&%%%$$##"""!!!   ,-./ 0 1 2 3 4 5 5 6 7 8 9 : ;<<=>??@AABBCDDEFGHJLORV Z#_'d,j 1p 6u ;{@EHLNOONLIQQPPONNMMLLKKJIIHHGGFEEDDCBBAA@@??>>==<< ; ; : : 9 9 8 8 8 7 7 7 6 6 6 5 5 5 5 5 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 5 5 5 5 5 5 4 4 4 3 3 3 2 2 1 1 0 0//..--,++**))((''&&%%$$###""!!!    ,-./ / 0 1 2 3 4 5 6 7 8 9 : :;<=>>?@@ABBCCDEFGHJLNQUY#^'d,j 1p 7w=}DJOTX Z![![ ZXURelease_v0.3/kernels/compiler_clod.cl000066400000000000000000000051461223142177000200070ustar00rootroot00000000000000typedef float2 vec2; typedef float3 vec3; typedef float4 vec4; #define sin native_sin #define cos native_cos #define tan native_tan #define normalize fast_normalize #define length fast_length #define mod fmod inline vec3 reflect(vec3 I, vec3 N) { return I - 2.0f * dot(N, I) * N; } inline uint pack_fp4(float4 u4) { uint u; u = (((uint) u4.x)) | (((uint) u4.y) << 8) | (((uint) u4.z) << 16); return u; } #define OUTPUT do {\ const vec4 final = 255.f * max(min(gl_FragColor, (vec4)(1.f)), (vec4)(0.f)); \ dst[get_global_id(0) + get_global_id(1) * w] = pack_fp4(final); \ } while (0) #define time 1.f inline float f(vec3 o) { float a=(sin(o.x)+o.y*.25f)*.35f; o=(vec3)(cos(a)*o.x-sin(a)*o.y,sin(a)*o.x+cos(a)*o.y,o.z); return dot(cos(o)*cos(o),(vec3)(1.f))-1.2f; } // XXX front end does not inline this function inline __attribute((always_inline)) vec3 s(vec3 o,vec3 d) { float t=0.0f; float dt = 0.2f; float nh = 0.0f; float lh = 0.0f; for(int i=0;i<50;i++) { nh = f(o+d*t); if(nh>0.0f) { lh=nh; t+=dt; } } if( nh>0.0f ) return (vec3)(.93f,.94f,.85f); t = t - dt*nh/(nh-lh); vec3 exyy=(vec3)(0.1f,0.0f,0.0f); vec3 eyxy=(vec3)(0.0f,0.1f,0.0f); vec3 eyyx=(vec3)(0.0f,0.0f,0.1f); vec3 p=o+d*t; vec3 n=-normalize((vec3)(f(p+exyy),f(p+eyxy),f(p+eyyx))+(vec3)((sin(p*75.f)))*.01f); return (vec3)(mix( ((max(-dot(n,(vec3)(.577f)),0.f) + 0.125f*max(-dot(n,(vec3)(-.707f,-.707f,0.f)),0.f)))*(mod (length(p.xy)*20.f,2.f)<1.0f?(vec3)(.71f,.85f,.25f):(vec3)(.79f,.93f,.4f)) ,(vec3)(.93f,.94f,.85f), (vec3)(pow(t/9.f,5.f)) ) ); } #if 0 // XXX vector type in the function arguments not supported yet __kernel void compiler_clod(__global uint *dst, vec2 resolution, int w) { vec2 gl_FragCoord = (vec2)(get_global_id(0), get_global_id(1)); //vec2 p = -1.0f + 2.0f * gl_FragCoord.xy / resolution.xy; vec2 p; p.x = -1.0f + 2.0f * gl_FragCoord.x / resolution.x; p.y = -1.0f + 2.0f * gl_FragCoord.y / resolution.y; vec4 gl_FragColor=(vec4)(s((vec3)(sin(time*1.5f)*.5f,cos(time)*.5f,time), normalize((vec3)(p.xy,1.0f))),1.0f); OUTPUT; } #else __kernel void compiler_clod(__global uint *dst, float resx, float resy, int w) { vec2 gl_FragCoord = (vec2)(get_global_id(0), get_global_id(1)); //vec2 p = -1.0f + 2.0f * gl_FragCoord.xy / resolution.xy; vec2 p; p.x = -1.0f + 2.0f * gl_FragCoord.x / resx; p.y = -1.0f + 2.0f * gl_FragCoord.y / resy; vec4 gl_FragColor=(vec4)(s((vec3)(sin(time*1.5f)*.5f,cos(time)*.5f,time), normalize((vec3)(p.xy,1.0f))),1.0f); OUTPUT; } #endif Release_v0.3/kernels/compiler_clod_function_call.cl000066400000000000000000000050551223142177000227060ustar00rootroot00000000000000typedef float2 vec2; typedef float3 vec3; typedef float4 vec4; #define sin native_sin #define cos native_cos #define tan native_tan #define normalize fast_normalize #define length fast_length #define mod fmod vec3 reflect(vec3 I, vec3 N) { return I - 2.0f * dot(N, I) * N; } uint pack_fp4(float4 u4) { uint u; u = (((uint) u4.x)) | (((uint) u4.y) << 8) | (((uint) u4.z) << 16); return u; } #define OUTPUT do {\ const vec4 final = 255.f * max(min(gl_FragColor, (vec4)(1.f)), (vec4)(0.f)); \ dst[get_global_id(0) + get_global_id(1) * w] = pack_fp4(final); \ } while (0) #define time 1.f float f(vec3 o) { float a=(sin(o.x)+o.y*.25f)*.35f; o=(vec3)(cos(a)*o.x-sin(a)*o.y,sin(a)*o.x+cos(a)*o.y,o.z); return dot(cos(o)*cos(o),(vec3)(1.f))-1.2f; } // XXX front end does not inline this function vec3 s(vec3 o,vec3 d) { float t=0.0f; float dt = 0.2f; float nh = 0.0f; float lh = 0.0f; for(int i=0;i<50;i++) { nh = f(o+d*t); if(nh>0.0f) { lh=nh; t+=dt; } } if( nh>0.0f ) return (vec3)(.93f,.94f,.85f); t = t - dt*nh/(nh-lh); vec3 exyy=(vec3)(0.1f,0.0f,0.0f); vec3 eyxy=(vec3)(0.0f,0.1f,0.0f); vec3 eyyx=(vec3)(0.0f,0.0f,0.1f); vec3 p=o+d*t; vec3 n=-normalize((vec3)(f(p+exyy),f(p+eyxy),f(p+eyyx))+(vec3)((sin(p*75.f)))*.01f); return (vec3)(mix( ((max(-dot(n,(vec3)(.577f)),0.f) + 0.125f*max(-dot(n,(vec3)(-.707f,-.707f,0.f)),0.f)))*(mod (length(p.xy)*20.f,2.f)<1.0f?(vec3)(.71f,.85f,.25f):(vec3)(.79f,.93f,.4f)) ,(vec3)(.93f,.94f,.85f), (vec3)(pow(t/9.f,5.f)) ) ); } #if 0 // XXX vector type in the function arguments not supported yet __kernel void compiler_clod(__global uint *dst, vec2 resolution, int w) { vec2 gl_FragCoord = (vec2)(get_global_id(0), get_global_id(1)); //vec2 p = -1.0f + 2.0f * gl_FragCoord.xy / resolution.xy; vec2 p; p.x = -1.0f + 2.0f * gl_FragCoord.x / resolution.x; p.y = -1.0f + 2.0f * gl_FragCoord.y / resolution.y; vec4 gl_FragColor=(vec4)(s((vec3)(sin(time*1.5f)*.5f,cos(time)*.5f,time), normalize((vec3)(p.xy,1.0f))),1.0f); OUTPUT; } #else __kernel void compiler_clod(__global uint *dst, float resx, float resy, int w) { vec2 gl_FragCoord = (vec2)(get_global_id(0), get_global_id(1)); //vec2 p = -1.0f + 2.0f * gl_FragCoord.xy / resolution.xy; vec2 p; p.x = -1.0f + 2.0f * gl_FragCoord.x / resx; p.y = -1.0f + 2.0f * gl_FragCoord.y / resy; vec4 gl_FragColor=(vec4)(s((vec3)(sin(time*1.5f)*.5f,cos(time)*.5f,time), normalize((vec3)(p.xy,1.0f))),1.0f); OUTPUT; } #endif Release_v0.3/kernels/compiler_clod_ref.bmp000066400000000000000000006000661223142177000210250ustar00rootroot00000000000000BM66('$$!O56433UƨXͮ[Ӵ]׷]ٸ]ٸ]׷\յ[Դ99¢9ģ:ƥ;ȧ<˪<ά=Ю=ѯ>ѯ=ѯ=ϭ`߽_ܻ_ݼ`bdd>Ӱ=Ю=Ю>Ӱ?ֳ?׳db`b?Ա>ӱ<ͫ:ť]ٸ_ܻ\׶Wˬ566QLM1,{?|B?|"s`"q_7m/l\ZK_P&VI!LAL@<2A8@63+@74-+$/)#*$"-&&!2+ "/)&!703-D:@7E;)UI WJ6rb'k\C{/pN9XA_ȬGǩhټJ˭mNҳOѳqtux^պzվ}±v! "#UG-jZlZkY5|j8p"ta!q_!r_!q^lZ0o^.m\0o_2uc3xf4yf4yg5{i6l8p:s$}h$|h#ye!s`lZfUbR`P`PbRdSdTeUeUeUfUfUgVhWjXjXjXiWfUbR\MUGM@D9=36-3+2*4+6-9/;1<2;2:07.4+0( +$ (! $ !                            $!# 1,2-# 4579:ť:ƥ:Ť9¢8899ã:Ʀ_ݼ`aaaaaab=ϭ=ϭ<ͫ<ͫ=ϭ>Ұ?ղ?ղcbab>Ӱ>Ӱ=ά;ȧ;ȧ`߽`޽[ҳTĦ576MJO2+x&kDB!p]!m[%|g1p`([MdTWID:'XKF:4,#PD7.5,C9 "4,#*$("*%0(*$0) "2,/)6/C9;3OC#G>2i[%gX5o`0u.m6;UCcѴElLвlpQַSָVغXչ\׻bջdz$""  # 6.\M1r`!s`!p]4yg8o5|jiXiXlZkYfU/m\1ra5|i8p:r:r9q9q9q9r9q8p6l n\gV`P\M\M`PeUkY o\!q^!r_!q^ o\ m[jYiWgVfUdTbR`P\LVHNAF:=35,0( .'0(3+8/>4B7C8C8A6>49/3* ,% & !                         !'$1,'$$ )%60)%S1269¢:Ĥ[ԴZбYίYϰ[ӳ:Ʀ<˪=ϭ=Ю=Ю=Э=ά<ͫ<̫<ͫ=ϭbbbbcc=Ю<˪;ǧ;ɨ<ͫa^ڹX˭TĦ785KJP2*w&jDC6}k jY%{g5yg([M^OaQB7#OCQD7. H>!J?/(?5/()#/($4, #91 $5. 3,/)=4;3E;$KAE;3l]#`R8vf.pE}5V[BeԷI˭lmOӴrtuwy^Ҹ|{˷|}¾! ""PDYK0q` o\ n\eT4zh4yg/m]cSkZ!q^!p^ n\!q^9q=y>|>z;v9p6l5|j5|i5|i4zh3vd0p_.k[_O`PfU n\"ub#zf$|g${g#ye"ta n[gVbR^O\M\L[LYJVHQDJ>B7902* .& .&1)8/?5E:J>L?K>H<B7;12) )" !                     ! &#-)$! MN4899878:ť<̪=Э>ѯ=Ѯ=ϭ<̪_ݼ_ܻ`޽bccb<ͫ;ȧ:ƥ;ɨ=άa^ۺZѱWɫWˬ873/LPM*w'n*vB8o!n\#vc"p^,dU+bT^OM@"MA%UIB8>4!J?=57.4+,&-&)#3+-&1*/)+%+%,%70<4<3 C:F<&OD YL5pa7td,{iI6R@FƨgػIɫLѲpqSٺUٻW׺Zֺ^ؼzdɲ|ɶms!# $!"!" '[MRE/m]/m]kY_O1q`4{h2tc1q`!q^${g%j$}h$|g%j>{?}>{:t6~k2vd1ra1tb3we4zh5{h4yg2vd2tb2uc4{h8o%~i&l&l%j$zf!r_iW_OWISERESEUGUGTFPCJ>C8:04+0(2*8.@5H<OBSESFPCK?C88/ .& #                     ",'# +&/**&-)!:460-Wʬ^ڹ^ڹ\׶[ӳ[Ӵ]׷^ڹ^ۺ^ں^ۺ_ݼabbb`߾;ȧ:Ĥ:ƥ<ʩ=ͬ=ά_ۺZѱWˬYϰ[ӳ8510NNI*w*v*u>y9p8o#ta jYcS.iZ)_QOBPC#PD I>G<?5?6C9.'4-1*)#5--&5-("6/ "92*$"H>5-!D;K@!C:!^P6sc$bTAvL8S>DiݿlO׷QٹSٺuwx{`ֻb͵|ӽ|ǵm||!"!#"  91'[MQC/m\,hX lZhViX8n9q8o:s&n(q'p%k$|h%}i%j;v8p5{h1sa0o_1sa4zg7m9q:s9r8p8o8p:sK?OBSEUGVHTFOBG;?5907.90@5I=RDXIZKXIREJ=?42* %                        ".)-)$!3.-)+' 81!;4'$ M,}26VȪUŧTĦWʬ\յ`߽bcc`:ť88:Ĥ<˪=ϭ<ͫ;ǦZбWˬYί\յ\յ6212PKDD-~*w$zf8m56/F< B9F<0gX"`R$eVAw2y8WBHʬKҳNԵQعtuwy{}}{վ~Ծyn{ &!"!   ' #QE*bS]NjY2tb6k#ye#xd%j?}>|=z>z?|&n%}i"vb!s`"ub#xd#xd"ta5{i4zh6}j9p{?}>|=y;t9q8p9q:r9r7m4yf/n^*bT%XJ"QE"OC#SFQDWH[L\M[LVHOBG;A6>4B7I=RDYJ]N]MXIOBD96- (!                       '$ "# /+&# ,(# "   EG499ã889;Ʀ;ȧ:ƥ9¢999ã;ȧ<ͫ<̫:ť8WʬYί[Դ\ֶYί31242HAEI+x#ta"q_=y&WJ8/8/F; '!6-4-1*90,&70 $6/*$<36.<3A8A8I>+[O$gW7ue*ve.oNU^ƫiݾqsUۼWڼYټz{`ؼbԺ}{Ҽ{ʶ|ñls¿ &""! H<+cT1ra5{h"ub:s;u>z'n&n'p?}=y;t:r:r9p"ua!r_"s`#xd%~i%k%k&k&l&n?}@@?}=x9q6~k4yg4yf4zh5{h4yg1sb.k[*bS'[M%VI%VI'ZM*aS,hX.l\/n].l\^OWHOBH<F:I=PCWI\M]N[LREG;:0 +$                       %!3-3-(%-(60*& -)"1,1,     %UHAE3654468:Ĥ;Ʀ;Ǧ;Ǧ:ƥ:Ť97X̭Yϰ[Դ\յYϰSN1441EAEI*w#ub"q^&k<3$LB \N4m^3l](n^-~kMXfغMԵҼ`պbڿ~}fиgȳ|ʷzx~yſ"!  $PC.k["ta9q$}h%k>{481RF-`S.`S#bS<~mE~R\èI˭ѻл;qcӺdҹ~gͶf®zŲ{||#      "OCZK,fW"ua4zh:s"ta$|g9r7m9p$|g#ze${f%k>{?|?~ACDDC*w*w)v(s'o%j${f#wd"ub!s`!p] m[2ud2tc2uc3xf5{i5|i4zh4yf3we2ud2uc2vd3xe4yg4zh3xf1sb.l\*bS&ZL#RF"OCI=MAQDREOBH<<2 /'                  &#-(71-($!-(1+&"%! +&($#        *$$zf"taESUŧS23455668\ֶ]ٸ\յX˭SPPR541-,|-~GC=x;t&k%}h"q^3uc4yg2tcWIRD\M(\N!K@NBQD>5C9C8+$>5?6+%?5+%4,("3,/(=51* C:;3 C:I?*XL!]O6rb=pJVD¥и̸ʺ˻ͼγFYU}}gϸz̷f|Ƴ|nqw     $K>)`Q(^P n\/n]6k"s`!r_%j;t;t?}B)u)v+{,~,}EDDCB@>z=x%k%j$}h$zf#wd"vc#vc#wd#xd#zf${f${f$zf:s:t:s:r9q8p7m6~k5{h4yg3we1sa.l\+dU'\N$TG!MAI>I>E:H<H<E9;1 /' #                 *&0+)%",'1,# "$! 3-#?81,&#3-0+       "9q%|h;sGLKJLQVȪYϰ[Դ\ֶ\ֵZбUƨQOPSUƨTæ1-,|./G?{9p{>z(r+z+{EGHG+z+z+|*y(s'o=z>|?}>{XJdT7l9r(r(sCFE+{,~+|*wBCB@~?})t*w)v)t)u*y,}GFFFGHHFD*w)u)u)u(s'n%j$}h$}h:s8p6~k3vd.m\+dU'\N$UH#RF!NC K?F<@7;27.6-6.1) ,% $                    " 82"<61,,(4.0+!($&"($#?8">7($%".)&"         =4(\NVHjYFNNLMPRPMLMPT¥UŨTæPJD+x.0/+x%}i8o=xA41)B82+,%80+%:10)=45.2+D:@6&QFVI#bS;}l1wTѻзɷɷɴƦhyvJRQ^bfzƳyvj|~    %WJ(^P!r_#xd>{AA*w)u(sBA@~A*w*x*x+|-JJKLMM////.-,|FGFC@~G<G=G<C9=44,.'+$*$ )" $                     ($%" /*1,"!-(,'$!*&;4%D<3-%"/+/* "'#       !MB1q`QCYK;tBB?}BHLNPRT¥TæSOKFEH//-})s&k&k>z=x8n5zg"q^"q^hW]N,dU,eV'ZLK?UGUGF<D:H=4,91"MB2*5->5'!8/-&8//(=41*91<3#KAG<(TH,]P5o`IккиʷɶDZÛbspKTSYeedzhwypu{      <25,2ud0p_$|h%j$ye>z=x;u'n(s)t*y-KKMOO100/.--HHFEDE+{*w(r(sB@~B90(8//(/(1)801*5-5.1)!G=:2!F=-`R!_P%jZD}ӼԼӹ̷ɵǭ¿]nkLUTXadguhzlo|      >5F;SF6}j0o_5{h#ye!p^"ta?|BDIMN000//../MPQRSTŧUƨUŧTĦ44SRQQO0.,}DBBA>{7lhW m[ n\iW^OPBC8D:F< K@!MBH=<3+$  "& %                  /*#?8"<51,1,50+&.)/*/*">7)MD'H?2-*&/*+&"&")%        I>aQaQ%|hHKIINRROJECDGJMMHA&j&k)s+x)r#ta0n^3ud:r:shWUGYKeU+cU#OD%THZKNAB8F<G<90=3A8=42*,%91*$;2/(;2*$=43+=3&RGTGYL5qa/sӾӼй˸ɴƩ]mjMWVV~vuedxgxm|{        .'M@OBWH1ra-iY/n]#ye'n(s+|/0MJHIJLORUŨVɪVȪTĦRR58:ťZѱ8^ۺa`߾a9¢2GA(pSE //.HHE>|${f#zf"ubkY\MM@F;I=$UH%VI!NB=3'!  '!)#("#                 $!4/2-$!"+'2,,'%!.)>6%C<2,*&6071&#+&811,)%92'I@1+%"         C9C8L@"s`CDAAFGEB?}@EJMMJC>5A7D97.;3A7 $7.3+2*6.1*1)913, D;D:)WK/eW3l]G׾һ̸ʳţz[jhMWVWrlkqgeyhz}uz        <38/L?OBOB-iY3xf8o?}GK.,}+y+|.0356532SX̭\ֶ\׷8ZѲ^ۺ<˪[յ=Ѯ9Xͮ:ŤQJ04yg0n^bR 8eZ/PH*w(s)uB>|(q&m#yejYYKRDTFXI)_Q$SG<3" "-&.'*$"               )&*&1,,(,'4.3-<4+RH-VK$C;)&&#+&:3A992%E=-WL'JA'#$!&"           0)D95,@5cS7m6~k4we7~k4.'D:/),%=41*7/1*1*5-6/91(VJ*ZM \N%k[AvԼι˳ŝsYigMUTZxqpuefwkyot      B8?6.'I=TGVHkY@FFDDDGKPSSQNM3677X̭\յ<ͫ9\ֶ=άZѱ;ɨX̭8S/K7l(o%TGSF *# *VL*VL0SK1UL4\R.PG*y&l6}j(s&k!s`cS[L]NaQ+fV$UH90  /(3,1**$                712-504."&"5/7092(LC,UJ'JA2-)%/*.) 81G>"=64/%E=$D<$!%!        H=QC@5PC3uc6}j3we4we;sBFHIIJHD@~=wB8)WK1gY-n־к̱Øn}YgeNVUMYWkmshtkzo}x}½       *$I> )" -%SEkY"ub%j*w+{DBBGM4432479ģ\յX̭Uƨ8:Ǧ]׷9¢9ã]ط6[Դ4Tæ/I*v;t jY.iY8/D:,[Q,]R'OF$D=/QH1VM.PG,JC+JB!r_%~i!q^dS]N`QdS]N#SF4,!,%808/2*%             0+($-(5/0+&""1+?7>64/4/#@8#@92--(92G>C;92B:*QG"=6*&        H<SESE!n\=w=x;s>yEIJHDA?|>z=y>z@CB=y7m!n\"r_%{g&j#ubcS,eV1p_5zh/m\K?I=ZK^O$SGF<"NBF;;2E;D:>58/,%8091,%A8&!7/(";2?6=4#KA(TH$hXAwҼΰ‘izxYgeNVUPfb^busivm}sw         8/:1 ,%B7jX'n)t&m#xe$zf(r-233102VȪYϰXͮ569¢[ԴYί9:ƥVɫ8ZҳO6J-F$zf;tWI1ra +$"MA(ME'MD)TK'PG&LC&LC+IB)F>-ME-NF'A:gV]NXJ\M_OXJ K?/(+$;2=48/,&              '#&"2,4.5/A9!OD+RH!;5'$*&1,-)'#92"SH$[N!OE)NE+SI%F=       90F;:r?}=yzBDC@~:q4xf3vd7~k%~i&j"s`dS^OfU3ve/l\'YK%UIVHWINB#PE"NCD:>4G<A87/E;.'*$C:)#7..(-&+%80<3=591>5SG8"B;"B;,gXUGPCRDUGMA?6/'8/B8=43+!           .*,(71:3B:-VK.XM&G?0+*&5/4./)6/!OE%\O!PE$C;&G?'I@5/)%5.+'"          (!5,(]O4yg6~k3vd3ve8n:r9o5zh3ud5{i;uADD@;t6}k4xf5zh8n;s$wd!m[fUfU iXfV+cU([M*_Q+cTQDI=OB'XKB9=4J>D9.'?5>5 )"8/7//(,%2+&!80:20)C9&PE,_R5p`ҮdurVdbOWVPa^]`etukzn~|        $  <3.jZ"ub"ub n\ lZ"ua(rKQPLJNUƨ7757\׷\յ52VȪ[Դ51R4EK.:q)s6}j^OeU$RE907/  .`T,YO'JB&IA'NE'NE&NE#E=!<6"B;%MD#F>*cT&YLH<G;H<?56.5-C9@77.&            71)%!3->6;4!:3$A:'JA&E= 8250A9!ND LBB:G>!NC&G>-(-)$B:&F>6/>6#WK!QF4.#A9'JA         & #2*&YK-hX,eV*`Q-hX3vd6}j6}j"r_$xe'n*uC@~;s6|j5yg6~k9q;s8o3vd/m]gV!o]"q^dTRE&WJ,eV.j['YLD9K?UG!K@<3!J?G<6-3+;26.5-/';2 %0(*$8/0)<3@7B8J?'ra֭¿|dsqWecOWVUhdhofewjzq{v          $ *#)`Q9q;u6~k2sb4xf=xFJIGKS6632SS¥34XͮVȪ11UŧP*vK.%}h@~#wc1p_ kZ&VI&WJ=3*$,% 4YP4YP(NE*XM)TJ">8!<6&LD&OE#D= <6 ;5 <6'\NE9:18/4+4,=4B890*$         .)1,.)4.D;!ND)MC71+'3.!:3 8160 J@&`S'dV#TIF='I@#A9($ 82 LB JAI?&`R'fW(MC!<5(LC.)      ,%6-0(H>+bS-gX*aS,eV3uc9p&j&k&l(p(q'm$ye!o]2tc5zh:q=xF<90;10)2+D:+$4-3, !4,/(3+91<3%OD,^Q"bSܬyaroVcaOXWUhdklohuxm|t~        ("4,#PD/m]5{h6}j8n=yEHEAA-120LQVɫ632ON3TæL*wLI%}hA+bS8/&!.' 8`V7`U7_U1SK'JB&IA%F?&LC'PG"A:50"@:%NE#E=,fW K? /' ,% -&4,@780+%        !.)4.:3 I@/\Q.XM$B;.).) 93:3;3H?&^Q(eW#WK'H?&F>(LC%E=5/$C;$[N%]PF=I?#VJ$D<"/*-(        >4>4 J?,eV3uc2sb0o_5{h&k(q(p&k%{g$xd"s` l[gViX"s`{A6D9L?C97/?58/ '!A8;2 '!5-("-&5.-'<36.4-C9/dV媾t_omUa_OXWRda]ksfiwoxuy          *$!MA)_Q)`R(^P+eV4yg@~HKHF.354OQTæ3.,~NS51HF/F8"@9$G?$I@$G?!@9612uc!MA % & +$5-5-*$          # 2-8281$B:)NE+QG&G? 93!:4F="QF!OEI?!MC"SH*PF#?8$A9-WM4fY2cV$YM(hY(gXD;%"4/3-&"         ":0A8'YL3uc5{i3vd4xf$ye&l&j#ub kZgVgVhWiX!n\#vc&~i&j$wd3vd.jZ.iY1ra4yg3vd/k[ZLSENA'YL*`R%UH@6?5PCF;4,<3>4.'4,<31*(")#5.'!80.'+%5-"H>2l\q`pmVb`PXWVql_cspkj{p|w~          !MA/m\.kZ(]O(\N/l\9q@~@?|)s-10-EHJ-.QUŨ3-~EJ.)t;u(q*v3ud]N$ye0n]E9*_QA67.<4 5YP6\R8cX3WN-JC/PG.ME*E?&KC&NE"?950!?8$I@7m J? ' )#0)&!         3-D< J@&E=6160">7#?8!<5$C;#TH(fW(fW#TID<C;&F>!<5"=6/\P8/91;23+:10(0)6. ;2)#4,(#,&.(<3NBo}]mkVa_PXWTkf_adutmxr|v~}            2+'ZL,fV,fW/m\6}k+IA*F?*G?,LD*G@9qA7  &"         :3!ND$XL-UK#?81,5/"<5#@8A9#TH(gX)iZ$YMF='I@+RH,UJ+RH1`T3fY8qb'JAF='cU$ !          5-"OC%VII=K?XJbRbR^O`P kY#vc%|h%zf"r`iXdSdShW!n\"s`"r`1ra+dU)^P,eV0o_/m\SFH=M@VHRD!J? H>#OCC8<2>480/)>47. "80% )#2+0) % &!;2#J@&QFk}z]ljT_]PYXUfcdjfgvloyv~x|             -&:2G=%WJ0p_7n2* (!          5/F=.XM-VK&F>!:3#?8)MD!PE!ND!OD#VK$YM"RG(JB*OF4fY;xh;xh7m_'cV&aS!OE:3G>(eW;xh7o`>~mF~+p`J@$[N KA.)        ' !:2 J?A7?5L?\MeTgViW"q_%{g&~i$xd kY]NYJ_OhW"r_#ub"p^eU*aR,fW/l\-fW([M&WJSFSEI=B8$RF&VID:6-A7<3-&;2C9 '! ("*$+% % 7/ 1)/(7/I>&m]혬k|y^ljT_^PYX^{ukppflup{ty~}           2*?5:14,@6(]O5{h;t:r#xd&m+z-C@CF+z(p'oEJ/-B;t%~i+xA2tc!n\;s3vdVH(\O.jZZK904,J>0) &    .ZP-WN,WN)NF0QI/MF1SK4[Q.NF(@:+G@,KD*E>)E>+JB1ra-&      92"<5%C;&F=#@9!<5'G?$YM(gX(dV#TIH?H?)ND(KB,UJ9qcBvBv9sd$YM$XL#VJ KA!PE?oICw9td>n*o_E<;4 LB#"      ,%-'#3,!K@H=D9K?^O lZ!p]!o]"q^#vc#wc!p]cSYJXJaQ lZ"r_"p^hW`P^OaQdS-hY([N%UH([N,eVWIC8;2K?)]O!K@80;1>42*80;21*-& 1*+$ #)$ !,&/( C:ZL퐣hyw[jhT^]QYXXvp`bqujzys|xzz|         @6$SF J??5D:&XK-iZ.l[\M^O!o\%~i%j8o8n=xA(r(rGLJ)s%}i&>8)E>&YL1)       *PF#?8 93!;5"<5!<5C;%[O*l]*k\%ZN I@I@.XM1_S3eX9sd?p?o(fX#UI%\P+p`.{i.yhDyHAt.XM+RH'cU$XL=5#WK/]Q"     0)'!/(E:REOBNA]N lZ!p^ kZfUfV1ra0p_.jZ,dU_P iX"p^!o]gV\MYJ`Q jY kZ_P$SG"MA([M.iY+dU"LA:1F;QDI=A8>5:23+=391'!7/ & 7/  $$ 0*5-&PE툛gwu[jhT]\QZYUkg]|bhswm|q~yxz            90#RF#RE"OC'ZL/l\2tc/m]`P!q_$|g$zf7m;uC,},{,|IF%}i"q_&kBA&l6}j0p_#vb#vc-gWXJeU*aRH<H=#PDI= (!=3 *#  /]R.YO-YO*PH*PH(MD%E>(PG4\R-LD(@:*F?,IB+IA,KCB8        -VK%E=">7%D<(KB*NE KA#UI&`R&`R#VJ LB.YN7o`>n?o9sd6k]'bT&_R%\O)i[2s6J?oH<J>C9B8 J?7/.(>4>4 "+%3, #)#% !  7.6/ C:킕gwt\igT]\QZYaykdeiylq|uxvy            .'A7F;"NC+dU5{i8n4ygiX"ta'o)s'o>zCH,~)u(q?{:r"r_&lC@~#wc"r`6}k7l"q^`P)^P iXiX J?A7OB&WJB8 (#<2    9_U8^T)IB+RI+TK*RI,XN*SI'LD&G@#@9$D=(QH'NE!=7 93:0       'JA#?7"<5(KB1`T'cU&aS$WK"QF!OE!MC+SH1`T=}lE{Dy:ue1_S%ZN(gX,tc0m4z6H;)hZ9rcAt,(6/      4,@6:190E:&VI%UH!MA!LA&XK-gW1q`3ud4xf5{i4yg1p_+cT'YL(\NaQ kZ jY^NQDNBUGWI&WJ"OC$QE'XKI=<3?5I=A7.':2>4+% % )#,&.',&("'"2+F;}durZhfT]\RZYc|jmrjw{puyyw             0)4,4,@7'[N2sb4yg2sb kY${g(s(s=x=xAA&j"ta$xdOB"MB<3 )"802+"  7YQ8^U8]T:cY,VM)ME*RI(KC&F?)RI)TJ&IA$C<#A:#A:@6          #@97150'I@&^Q*n^*l]%\P!OE!OD/[P2bV7m_=}lBvAr:ue&`S)iZ0o5}5}0o/|jBvBuCvMYZ5|0o4y-ve&G?8pb6k]     1)1) ,%80 I>#OD!LA!K@&XK.jZ3ud4xf3ud1q`0n^.k[,eV+bS,eV/l\0n^-iYWISFXJ]NUHH< H>$RF)^P%UHC9<2D9=37/<3:1)#/( '!!80   &! "PDzeurZhfT]\R[ZYtn^~pusmy~sx|            1):07//(7/"NB)`Q*aR'[N m[#wd"r`2tb4yg9p9p!p^"ta>zA>z$zf#vb2tc0p_$ye'n5{h*aRXJ-hX2tbcSI="NCSE@6@6=390A8  ("$8\S9aW5XO2PI4XO+SJ,YO+VL(NE(OG&JB#?9%F?(SJ'OF           'H?&E=$B;F=%ZN(gX(eW$XL!OE$XL9rc?o@q;xh8pb8pa)j[+p`0m685{-ue9qbEzRWTTS5{.zi6B?G~XNA971       .'4,/'7/ I>&WJ'YL&XK)_P/m\3uc2ra.iY*aS*aS,fV.jZ0n]1p_0o^-hX)^P&XK([M]NaQWII=B7H='YK%TH!K@ I>@64,4,A7=4#.(2* +%#,& '" #I>vbrpZfeT]\R[Z]zbefwmp}uy          =3F;<280@6#QE&YL#RF!L@PCbRhWaQ-hX3we9q${g$zf'oB=x3udhWjX2tc7m&l#ub+bT+cTeU.k[,eVTFL@+bSL@-&<3 I>B7?6  )#   0XO/VM9`V8^U8^T;g\4XO2RJ3VM0PH(NE)SJ(OF&JB&JA$D=           *OE/[O2aU%[O%\O%[O#VK!OE!MC3eX?pF~E{@=AEŧ=>}mOQ1,-WL     %  4,A7D:C9#QE+bT-hX,eV+cT-iY/l\-gW'[M$RF&WJ+cU0o^3vd3uc0n]+cU([M&XK(\N,dU,eVUGMAJ>K?I=D9"LA%TG"MA4,-&B7@6("$1* ("*#   " #&!uaroYgeT]\Ua_fjdiwyp~y|          8/D9?4B8#QD*bS+dU&XK$UH\M lZ n\hW4xf;u?}'m%~i&k9q0p__O jY#ta8n8o"q__P+dU lZhW'YL%VIRDYJ+bS=3 +$E; H><280  !'!    4f[2cX/XO8_U4VM4WO3VM6\R9eZ4YP0QI2UL/OG,IB/PH           (JA3dW)j[)j['aT$YM#UJ#TI0^R7m_@qEzBv$SF"NB%UH,eV/l\-gX)_P)]P*`RUGM@G<MA\MhW lZ1q`-gW*`R)^P*`R+bS*`R'ZL%UH&WJTGTFG;<2A6&VI%SG:21*:1=3-&,&(#$1) "  qapnYecT]\S\[bxisij|{uz|         *#2*1)7.%VI.k[.l[*aR+dU!n\$zf#wc!p^8o>z~nNBREQDOBTF_PgVfU]MTFTF/k[/k[)_Q#PD!K@%UH*aR*aRJ>=3@6J>H<?5<3C980 (!0(.')#)#    !E~n7m_8pa?o3v775|4x5z9?^ȭYUV^ƫfڼgܽcҵfٻe׹dӶfغnnLյq     -&>5=480A8"OCK?E:C8K?VH\M]NaRcR\MQDK>PC^OhWfUWI"MB J?%TH)_Q(\N#OC I>H=K??43*A8"NB<3$ )"3+ %$   m~|`nkXdbU^]Xlhabgwxrrw            "80F<C9?6"OCYJZLPBMA*bS-gX)`QWIhW#vb7m6~k#vb l[*bS*`RgVhW-gW,fWTGRE.k[+bT>4?5"MBL@QD=4#.'3,5-0(    =bY:^U>g]9^U1^T2eY/]S0`U.\R,WN,VM)OF*RI-_T            (fW)hZ(eW%\O"RG"PE4gZ@rF~Ez>}m:sd?o4x873u1p5|<@@WU]Ūfٻe׹^ƫ_ɭlokiJҲLյo      ("-''"-&8/B8C8B8L@dTeUaQ_O\MVHMAI=PC_OgVbRTGJ>K?'ZM(\N#QEF}l4G=([M)]O H>1)6-@6:15-4,)# (" )"   izw]liXb`U^]Zokcormp~z            ! ,$ % %7.H<"NBD:G='ZL\MXITF`P4wf3ud.jZdT jYbR(]O+cTeTgV2uc5zh^OC8#RE+cT_O`Q)^PD:A7&VI I?>4=3:1 I>>4"5-)#  5`W4_U}mG~7875{6~;AA==_ɭe׹fغbгhܾmIбIбLظLֶnOٹ         /(,% )"3+F:NBM@M@UG\MZK&XK$RF&VI)_Q,dU,fW,fV[LVHNAH=L@XJ_OWIF;;1A7&VI%UI H=@7F;>40),%6.4-! ! izw^ljW`_U^]\xscdvyqt             ! " ,$C8SE&XK#QE'YL.l[1q`_P]MgV2uc,eV&WJUGXJPC&WJ/l\!o] m[2sb.k[L@F:*aS.iZVHSE$SG"MBSF$SG7/5-;1E:$QE6-#  0)  I|o8i^9oc6j^6k_3cX=h]=h]8]Tnb=l`              'bU"PE JA%[N?nF}F}@qE:J>L@D9@7C9#OC!J?6. ("/(/(*$(#  fwt\jhW`_U^]\olkilyu             & ?4M@I=!MA'ZL-iY-hXTFSEZK(^P"NC K@PCVHTF,fW4yg!n\`P+bT)_QOCXJ/l\(]OD9K?&WJ'ZMUHE:1*;1>4F;F< *#(#4, # "!    EqfBkaCod6g\5g\8re6m`6nb3g\2dY1cX             (fX&_R&^R*k\BtDyAr4H=F;A7=4D: J? J? J?%TG,dU0n]/l\*bS'YL&WJ'YL([M([M'YK%THJ>I=MASEOBB77.;1"LA!K@>55.-& # #/'%  !fwt]igW`_Va_e|pplq~                 !5->49/>4!LA&XK$RFB7E:NA$TG!LA%UH^NbR`P0o_1raZKL@(\O+dU\MbR+cT I>@6QD([M&WJK?=4@7H<;1915- %:1A8 )" $   J}pLuFuiEthBncCsg4fZ4h\6na4h\               &_R)hZ,tcCxF|Bv>}l=CFȪEħDIбKյIͮKյMڹMطqN׷       !2*4+-&2*C9F<?5:1C9"NC$SG$SG&XK*aS,fW*aR%VI#QE&XK+bS-gX+bT%UI!K@ I>"OC&WJSFOBE:>4>4>4?6>5B8=4$  (" '!    \hgW`_Yjha}es{pv            ("   %4+6-.'5-E;"NBG<=3H<TG(\O'[M-iY lZgV]M+dU(]PG<K?-gW.k[\MZK%UIG=OBWH!MA I>C8E;%UIK>.'.(,& *#E;=4 % %    =rf>xk;sgJrJrGzmIrG}oEzm                $YM*l\/~lF}Dy@r@rE{LQ75z6};@A>XI.jZ)_QNAQD%VI&XKZLK?8/C9E:$QE'ZM?5 *#5-1*0(>5)#  (! '! (" M~rNuHwk?|n;sf;sg9nb;wiIr                (eW-wfDyCw>}l%UI&WJA73+3+904,,&/(5-   !  mst            "*$$(";2A7<25-@5%VI&WJ"NBMA]N`P)_Q)^P,eVXJK?L@*`R*aSVHZK'ZM I>G;UG*`R+bTTG:180!LAH=#OC"MA4,5-F<8/.',%  ,%)$5. ("  G}DwSQ}RMwMx;tg                 -wf2sGBt;wh=AFǩEħCHίKյḪKҳMٸnqNظ        *#2*-'6. J?$SG$RF#QEREXJTFI>E9I=NBPCPCSFWISFJ>C9J>XJ]N'YLE;=4G=%TH$RFD:>5?5?5,%  )#=4("                   1*7.1).&<2"OC K@C9!LAQDNAD9"OC&YLPCMAZK0o^-gWREL@ I>B8OB]N)_Q'ZLI=90!K@%UI>4@6@66-F;"MB4,*$!  )"+%'"/)    XH}G}GE{E~V                0~lGF}CvBtF}NRN6}9>A?<@EħEĦCGɫK׶JҲKӴMظopOںQظ       ,%7/2*3+C9"NCE:?5A7I=I=C8B7L?WI\MYKTFPCK?D9B7K?YJ[LPCA7?5#PD%THG=80=4#OC#PD0)  $1* "                        #  ,%=46.0)<3B7=390!LA'[NUGXJeT/m]'YLF;H< J?"OCZKVH!LA"NCH=H<([M"NB0(:1?6>4J>A7.(/')#)#3+ "      LNK^\]Z                 0mBt@pBtHPTPJ6}I=H<L@REQDI=B8I=TFQD>40)90"MB#OC90*$-&.'$                    ("/'+%+%<3@6=3?5'YL,eVXJXJ\M'ZLH>H<RE&XK(\NTGA7?6#QEOBOB&VI@72*F;D:<2<2.(2+;17/;32*     sƱZYXUh                  F|>~n;vfAsLSSN6~;@?<>D¥FƩCFĦJҲIίI̮LطmpOٹq        ("+%# #/'3*,% *$5-B7F;G<OB*`R+bT'YL"OC"NC%VI(\N(\N&WJK?F;A7B7L@VHREA63+8/ J?C90)/)A7=4                & $ #4+7.5-;2!MBM@H<M@+bT*`R#REI=K? J? I>VH]N(\N&WJG<8/G='YLJ>F;!K@@6D9%SG>51)-&(#>5A7:2<3)"                       F}AsArHQRNM:@A=&WJ$SF#PD$SG$TG"MBGB7?5E:F;7. $+$B9A8"                ,&914-/(7.E:E:E;"NC(]OTGK?NA'YL!MA>5>4F;!K@%VI]NWI#PD#PDG;C8&WJ$RE7/<2 I>!K@RE#OC3,/(-&2+E;6--'3, # "3+%  !                     CvCwIQTPLP>A>=BEħCCIͯJбI̮oppNٹOعr       .' *# '!2*@6D9A7!LA'XK([N$RFH=F< I>H=F<"NB)]O,eV*_Q$RF!K@#PD&VI&VI#OD H>E;E;E:L@I=:1 *# )"5-5-  !              4-?6:180A6L?H<H>$RF'[ML?A7E9!MBA8=4G;MA"OC%VIRDD9G<%UIQDOB%VI?6.'@5"NC#PDL??66.=48/;3;2 #/( %2*<3'! #                     ?oEzNTRNPX@> I>&WJ+bS*_Q#ODC9B8F=38/8/6. '! %!                )"4,.'0(:1B7;17.H=!MAC9:0D9"OC I>"OCUGNAF< H>A6:0"NB*aRTFK? J?;2;1K? I>B9=391G=K?:191-' ,&2+ ("4,-&                         DxKQRMNW]Ū@<>E¥EĦBGǩJҳHˬkompOۺr        *#4,/(/(<2!K@F<?6C9!K? I?D:F<$SGUGUGOBK?I=C8>4C8RE[M)]O"MAE;!K@$RF"MA?6:2?6F< H=<2,%                     $ !2*2* +$ ,%@6E;?5?4NB'ZM%UH'ZMSFA6;2C9?5@5'YL(]OD9A7!J? H=MAK?8/8090@7$RFD91*910)*$?62+ # %   ("  ("                        KSTPLR\]ĩ=>CDBEJѲIͯkonqNظq         )" & ,&807//(3,@7 H=C8F;SE\MXJMAE:E:E:D9G<PCSFK?@6B8PCVH$QE:24->5F<B8;26.,% !                         .' ,% &-&D:H=E:J>WISF#QE#RFE:9/@6"OCJ>H<%THG<5,C8%TG$SGNB>43,B8?5B8F<0)/(C9>5>4D:'"     !(#                        OUSPRY]éWVBEæCCHʬiݿh۽nnoNֶOٹq          +%0),%.'9/E:H=I=OBUGQDF;B7H=PCREOBM@I=A7<2D9TFWII=8/90 I? H=5-*$3+:23+                      #  ("802* -%8/"NB!LAF<I=MAA7>5E;@6>4$RF)_QM@D9G<918/L@$RF!J?E9:1E;%SG<37/5- (!:1"LAA7>47/" '! ! ,& #                        RRNPX]ĩZU\eշbϲ`ɭgڼjfոkmnMնOںOӵ     $ -'91803+>4J>K?D9B7F;E:@6A6MAZKRDI=E:C8A7G;OBMAA7:1D9QDH=-&$1*91+%                      ! ,% '!'!6.C9=43+>4F:A890;1:1-&5-F<E:H<(]O([MH=>4G<A7E:J>>5=4B7C8&WJ$QE1)6.7/2*C8?6.&4,-&.(90.'2+7/                         UPMS[[WZcгcѴ^ƫf׺kh۽knmOںOںq      /(0)-&80H=D9?5B8J>H<<35-:1@5G< J?&VJ+bS*_Q$RF J?"OC&VI&WJ$SG"NBA7:1:1I=REH<2*-&907/                      " +$ % & 91@77./'90:01*.'802* ,%=4"OCG;F:$RF I>?5F:M@"NBG;;20)A7H<F;$QE<23+ H=E;;2:1'! & 1)/(?6=3,%4--'                             TPRZ\UV`ɭcѴ_Ǭbβji޿jomN׷Nַqr      "5-<37/<3D9H<@6=3B8C8;1:1A8"MB$RE#PD$RF&VJ$RFG=F<%TG*aS*aS%TH I?F<E;F;!LA"NB>44,:1J>A6                 " $  !3+6.,% )#4,4,/(5-C9<28/ K?"NC906.<3=4E:REPC I>@66.;2#PDD990A86.C9&VJC:8/0)'!4,6.2+<3(" *$ ("%  %                           OPX]ŪYU\bϲ_Ȭ_ȬgڼiܾfֹlmnMҳNַPն         +$6.1*+$6.=37.2*8/F<F<D:"MA'ZM)]O%TH!K@ J? H>D:G<&VJ+bS)]O"NB H=#OD&VI$RF H=@7;280D:K?@6 #                 "  0)1))#1*:1:0;2G=$RFC8=3 I?A70)1);2A7"NBPC@6>5B8B7#PD$QE3+0)@7=3K?!J?7/;18/?6E:1)*$*$ (# &!2*% '"*$                            MRZZV[cҵbβ]èeԷiݿf׹jlkNظOٹq      #  "-&-& )#6.F<"NC"OC$SG)]O)]O#QEF;G<"MBI=I=NAPCI=>4G<'XK+bT'XKF<@7B8B8B9C97/% !2* )#                  (" ! $1)3,/(<3C8@6=3"OC#PD:17.B8:14,?5F;F;H==3/'B8L@I>#QEE;0(>4 I?;1=34-:1H=!J?"MB>4 %,&)# .'0)% 0))"                         QXYTV`ʯdҵ_ȭcѴjhڼiܾnlN׸N׸rr      '!2+0)7/ H>$SG#PD!LA#QE$RFE9?5F;SEWIQDJ>G;B7=3C8SFWIJ>=3B8%UH%TGD:7/-'#%4,(#          +%%  *#5,4,0)?5@67.3+A7<3 ,%3+=3A7G<OBH==4<32*0)"MBJ>=4F<C9A7PCC:0)3+3,!J?J>@7C92*-&B8:25-=4.' %)#                           W]èYU[`˯_ƫ`ɭiݾjhڼmmNظMնoQع        "  3,?6=4<3F<"MBF<6-8/@6B7C8M@YK[LPCE:E:I=I>K?NBJ>>4:0I=YJSF?54,913+% !            $  '!.'("& 2+.' ! "0)-' '!8/E: J?#OCL@<27/A8;1>4!LA:20(F7/8/?5B8!K@4+0*>55,B7#OD<4;191$(" !                           Z[VZaͱ`ɮ\cϳh۽f׹kmJͯMԵNַp       !1)1).&<3 H>F<=4>5>490/(2*?5H<I=L@SEQDG<B7K?WIXJPCH=C9>4>4L@UGJ>90:1H=D9 )"              # ("$ 1).'2+A6D9?6C9=33+B8$RFE:@5?6.'7.#OC H=C8<2?5NBH<A8=4 ("<3 G<90C8;2*$4,/(,&5,)#'"4- $ +%#                              ZUU`ɮcѴ_ƫbͱgٻeӶfֹlJ̮MԵMҳrPҴ        '!  !2*6-3,6.A8A87/1)90=390<2I=QDL@D9 I?!K@G= I>&XK,eV+cT$SG!K@#PD%UH%TG%SGI=>43+?5QDF; ("            ! !/(*$ "0(?67/91=33+*$4-5-6.#PD%UH<3:1?6<3H< H=4,90<2#PDPD;2A7>4<2$QE=4-&5-% -&905-A7>4,&7/80  $.( &  " !                          YU[bβ`ȭ`ȭh۽iݿfֹkJΰMַN׷pP׷      ,&& #0)8//(/'<2G<I=I=PC([N$QEB9A8 J?#OC#QE'XK)]O&VI I?!K@)]O-fW)^P#PD!K@ H=B8 I>#NC6.  "4+ )#         ! ! +$ (" #3+>5801)>4?55-5-3+ %'!:190:1!K@?60(?5!LA"MBH=7/4-B7?5!J?<36-%SGMAI= H=,&0)803,C8>46.>5'! )#                          UYa˯a̰]§dѵigٻknKбMնNֶpr       #/'2*2*?5M@NB#OC"OC$RF"MBE; I>&XK*_Q'YL$QE#OC!K@ G=#PD+cT-gX'XK!L@$SF(\N&WJ!J??6.( )#>5 ("     '! #+%7/-% )"6-A75.1):14++%2+3+,%:1 J?;12*80-&2*I="MBG=?5<3"NBH=7/;35-F;)]OB7A6@790L@!I> G=D:(")#'"                         S\a̰^Ū`ʮhڼf׹g׹mJͯMԵKͯoPַ       "  "4,;16.:1E:!K@@6=4G<"NB#OC%UI+bT,eV'XKD9E:L@NBQDUGRDF;D9)^P/j[)]PE;?6>53,2+3,       %-&   *#.'! #-% ' (":1=3:1!J?!K@1),%5-1*=3D96.<4B7#NC'XK;26. G=A7K?C93+G<#MBQDPDA7 F<4,& 80*$ #7/+% ,%& ! $                       Y`ɭ^ĩ\eӶhټeӷiݿIɫKίMӴppq      )#6.-&,%90?56.4,;2=46.5-D:%TH&WKOCSFSFK?E:OB]N]NSEL?J>H<L@ZL\MI=:1E:'YL"MA6.-&          ' #.'D:?58/D:=4 +$7.F;?6=31))#D:H7.$QE'XK?5D;6.E:QD!H>H=;2@7$OD>4#LA%RF8/>4 E<0*1*:1.',&;3(# #"                      `˯bβ]¨aʯf׺eӷhٻKѲJ˭MӴorOѳ       & ,&% .(=45-.&3+:190/)5.@7B8A8"MBREOBD9B7H<J>M@YJcS^ONBJ?TG[LYJREK??5<3OCYKC8,%/(-&  #    '!-& & 2+>5-& & 0(3+6.G5#OC&VIK?K?OBI=<3;2I=SFUGVHXJSF%SG([M0n^2sb*aRH=I=L@J>L@H<3+ (">4I=4,-&+$    !   ! +$ $'"8/2* $,%-&  -&4,>5D9914,=3>4#OC H= ("?6!K@E:!K@C9WI)]P"LAB7;3'WKTG%QE%RF2*5-?6/(8/;20)91:1% % +%                    B@EI̮HǩJίKϰJ˭MӴnoq         " #5-B9C9G<&VISFK?D9G<I=F:I=WI-gX)]O#PD$QE&WJ(\N-fW2q`/j[&WJ$RF,cU/k\'YLD:B88/6.I>J> ("   $"  +$)#$.'4+ )".'80 )#  %(" *#8/4-4,.'$80G<C8!J?>590%TH>56.F<'WJZKF<$QESF+`R.gX?5D:?66-&SG!I?B8RE!H>C:I>7.,&80,&                    ABFæFæGŨKбIȫKϰj۽oP׸p     $ '!("8/ H=G<A7A7F;@6:1D9RD)^P*`Q,eV,eV'YL"NB&WJ-gX/k\,dU)^P(ZM'XK*`R1p_/k\$RFG<'YL+aR#OCF;=4  "     $  *$ & "+%! -&4-2+>4<3*$)# % $C9>40)C9 G=NB"LA4-H<&VII>@6#MBdTWJ*_QK?I=-eWNBUH(YL@7F;A70)80,& !%!                    AFGƩEHƩIǪKͯnnNӴr        $  !2*=45-80C9@6/'/(:0=3<3#OD+bS,eV)\O'YL)]O)\O)]O/j[4we0m]&WJ$RF*_Q.hX,dU*`R([N%UH)]O2ra0m]"LAA8!K@?680#NC6.     "  #7.;290<30(% 4,3+3+?6 & )#!J?C8D9A8J>]NF<C8K?,cUREI>/iZ]O!fV*]PF;OBD;!I>LA6.>6>5'!$0*   !                   CGǪGŨHƩJΰIƩmnqNϱr        &!)#4,0)+$;2B8802*:28/1)1*A6L?$RF%TH)\O'ZM"MB!LA(\N.jZdTfUeU\MSEZL jYfUNBG<VHaQ`P_P+`R$QE([N4ud0m]&VI)[NE;    $  & 3**$&!,% $.'G<<24,6. '!>4 I>,%;1#NCZKOB#MB_P$QE%SGMA4ud0l\QD+`R'VJ eU2p`.gX"iXUG%RF+]PD:;2%OD!F</(5-6.$                    FæHǩF¥KϰJͯmmmNѳqҿѾоппѿ     "  ,%-& #,%805-.'4,A7C8D9QD+bT([N#PD#QE%SG#PDMA_O iXaQTFVH_OdT hW iX^OPDYJ"m\ iXPCNB`Q gW!lZ$tacSF4K?3,,%H=!J?I>L@1n^QD&UH3sb dT+aSK?#n\5wf#m\$p_)ZM+_QMAC9%PE@71*<3A8/)1*?5:2+&2+.'                   HʬGƩHǩJ˭hػnlnOӵrӿѾнϼμμμνϽϿ       .'6.0(90L@RDMA&WJ)]O&WJ"MA$RFYK_P`QbR_PSFK?YK!kZ iX[LZKbR iX!m["m\aQUG fV'~j$vc`P\M_PSF^O^O5,2*6.   % & ,&  )"!  # *$ !-'&!2* H>E:?55.K?E:91PC)]O(ZMVH:p`Q]N5we$r`4tc+`R"kZ/hY5ud$o]ZL2n_1k\NBTG+]PB93+90,&"4-*$  $                   GƩF¥J̮gոiټj۽nOַmܿԿҽмλ̺̺˺˺˺˺̻ͼνϾ     !2+=3<2<3I>MA I>D:#NC%UH&UIXJ hW jY^OUGXJ\M^OfU2q`+cT&VI-fW8l7~k/l\0m]6|i:q*\O#KA!G=SFNB70/)2+  ,&,&                EG¥iڼhػmkNӴpQӵҾлͺ̹ʸɷȷȷȷȷɸɸʹ˻̼ξu     0)903+=4F;C88/<2G=A7>5"NB*_Q[LZK^O\MREREcS5zh3tc-gW,dV.hY0n^5xg6{i0n^.gX8mB>x6{i9oA:pbS eU^O`P$ubNB/(=3   /( *#,%;2'"*$)#1)8/#*$?5A7A7PC0l\>41o_ZL fV#PD#OC(ZM^NXJI=H<REXJ-gX2sb3ud.hX*`R1q`:p8m1p_1p_7}jx9o=wC=w5xf=vH;rVH"lZ`Q90?5    ,& " 5,7/7.1)'!A6.&/(J>\M'WJ gW/iY gV:o%vcH)mGG)~j.( #703-            i۽kiڽlkݿNҴqPϲؿҼϹ˷ɵƴŲñ±±²òóĴƶǷɹ˻Ϳum{y       -&*$ !.&;2A8E;'XK,dU)]OOBREOBG;L@.iY2ra-gX)^P+bT.iZ2ra8m9o4ve2ra&}i+v(n$vc?zH=v8mHH$LB?66/6.5.              iܾh׺llLΰM̮qԽк̷ɴŲð²ôŵǷɺ̾툛un~{jxv      ! *$A7#OD!K@%SG([MK?B7J>XJ^N.iY1o_/l\)\O'YL1o_8m6{ieU!kY%xe(n*s)q(l+u,y(n+vJHJOLZί;:ZR=^V7\T3KF5DA6A?5=<6/   #    & 5,?62*<3+bS:1.hYF;?y#n\E37OѬƠ¼¼ü/v)zg+l(xf'uc'sb \N+[O1iZ2l]OC804,@8:2"                  gոeдjܾK˭OԵoOΰͷȴű¯³ĵƸɼ퉝xrkywgus       .'-&-&C9 H=:16.=3C8D9REeU1p_+bT*^P-gX1o^4vd$ub#ta gV gV'k+w)o&zg+v/)o)p20-|8TWƩ;4eZ3]S4\S@e\:VO6JF4JE0653876<;        $&!    2* $"0)MA%RF\M$QE$tb5ve+s3ֱͪȥƜü.t&p_&n^TGUIMBE;YL-`S>5")$917/  #         fӶiڽiټLͯNѳoķɴŰ®õƹzskzxivuftr       -&% (":17. )#1*:24,1*@6SEUGRE*_Q)]P&VI*_Q5yg:q#q^ iX#p^&}i)p*s)q(n-z,y*t/0/77[ϰ^׷7se?k`BqeBpd9XP3WO6\T3MH2?=1973><4:98>=,&5.G=G<<34--&&!            gոmLϱNҳl۾Qӵ­ù튞|tm}{iwufqp     #5-,% ".'2*,%/(D:#OCH=RE\NTG!J?#NC+`R0l\2sb"p^"n\ gV"o])q,y)p&}i-z/)o,x40289<£8|l6tf9yk6na1ZQ6h]4\S>aX=\U4FB1C?4GC0553885;:9@??MJ    >50) F< #:1#OC!iX5wfK$dU!\NK@I?PD804,3,&!(#;42,          iڽLͯMѳlnO˯}up}kzxgvtepo         /'7/<3%THZLSEK?PC(ZM'XK-fW7|j8leT`Q"o]'k*r*s(nBGDDNLM[ϰ68>ɨL}ArfIu>i^F|n>g]Crf4]T1OI6\S4RK3C?2=;/442764987>==JG?NK'!      '" -&B8*#aQ6-/iY/'PEVJ @8 @8;4)$4.0*  !         LбKɬLʭnQԶoٽwq~jxvhuserpcml       <3 H=B8 H=J?D9=4NA0n^4wf4vd5xf!m[!jY$vc+u,x&}i;sHH@{HPLSXȪ=Ʀ=Ȩ@tfEp5qc7vh9zk6ob5g\?i^=bYDqe<]U=]U1JE/<:3GC/650652875;:8EB8GE8IF #      $F;#OC fUEOD/bT0eW#G> B9   +%        K̮LͯlܿmPϲ~xql{yiwueqpdonblk       0(1)/(E;!K@91,%;2H=NB-gX5yg4vd-gWcS%zf*s*t(m(lEFA}GQKOZͯVŨY˭>Ȩ4ob7{k2fZ7vh6re4h\7se3cX9re4`V6cY3UM3SL>^V5GC2><.320541662875@>7CA8FC5DA3C@ "    L@(ZM#o].`SA8/)4-1*%!0)'"       IĨMϱoOαmؼ~vqm|zixugusdoncnl     ("% 2*E:F;7.=4 G=C9<3SE hX iX.gX.hY0m]3tc&|h+v)p$vc(mHEALPNYʬT[бaݽ,UL1eZ-XN2g[DzmHsAqdHs>i^Aod=d[Bmb?f\Cmc3RL3OI/? A9        L˭k۾mݿPгQɭ|vqn}{ivtfrqdpnbmkakj         0)-' %5,901*3+"MBXJ^O gV"o]2qa-eV2ra>x*t&{g"o]'l-|C?yKQJRY̮^ֶ]Դ)95)E?*KD/]S>laDzm>k`=i^Eyl4f[6j^7ma6g\7i^4\S=^V<\T9TM3A>1@=/=:.;8+75   $A8G;1o_۶Ȭ"ZM;392,'($        ol۾Mʭn{vsm|zjxvhusdpocomakj       0)(" ,%3,;3)]O!jYdTYK.iZ4ve9n=w>y%xe%vc*r-{*qEONQZͯTXɫc ##)E?+KD0\R;dZ4OI,004B?.33/440543<:3<;2=;2>;0<:0<:,75*4291    J?ί"YLE;:24-E;      kڽmPҴOȬ}xurm|zjxvgsrerpcmlalj`ki       #4, ("% 3,2*  & C8)\N*^P-eWbRVHMA0l\=v[T4C@2B?.32/43/542=;1=;0;9/<9-86,85*53(31&1/&!   5-ҲŪ92)$       hԸNˮPгSϳ}|wurl{yjwvgusepncnmalk`ki       -&(" *$6-80D9/iZ4ud,cUJ>QDdT#p^5yg2q`1o_9m*s*r)o/3LTXȫ^׷^յ&**%((#''"&&"%%'96-A=9\S.QJ1^T6VN=f\?j_Aod0WN7ma4`V7f[9k`7e[:i_6YQ6WO1D@5FB1:8.33/43/441870870981<9/:7.97*31)1/'/.%-+#+*     I=DLK۶Ȭ)SH      NϱOϱlֺ~{yvto}n}{jywhusfsqcnmbml`kj          2+E:G;%SG&VI#ODJ>eU&{h#q_,cU2q`CE%xe)q44OY˭Vħ:@Я)-,'++%))$((#''"&&"&%"%%'84':67WO9[S@la1XO5dZ7l`8la5bXHzm<]UFsg;WPCka5SL9^V6RL2A>.33/44/44097087/86.76,54+53)21(20'/-%+*#)( $$&!    *_QAHHFMMͭ     OαOαQα~|{xwtr~o~|m{yivthusdpncnmalk`kj`ji        :1D9,% 7/&VJ`Q_P\M0m]:qA?{)p14296=Ʀ?ͬ086,32*//(--&**%))$((#''&/-*A<-IC5RK8XP-MF0VN3]SCpe;]UAj`DqfBj`FshFrgAf]Emc9OJBe\2=::PK/43/44054065065087097-75+53)0/'/-&-+$+*#*(!%% $#6/=DC@FFCJIѰ¨     MȬmھRα~}|{zzwvsrn}{lywixvgsrerpcnmblk`ki`ji           % /(2*@6>5/()##OD1o_YK<3PC>xC8m@}5438=Ʀ=ȧ8CA4?=1:8-32*/.(-,'++&*)%)($(($'''31)421GB.MG3]SAka>f\>e\Dqe:oc5_V;pd7bX8cZ9dZ9_WYSHqg;QL8HE3=;165165166065/76.76-53,43)/.'--&,+$*)$))!%%!$$ ##""!!90 =DC?ED@GFBIIEMLȩ     nQͱ~~}}}}||||{{{{zzzyxxvutsq}o~|m{ylzxivthtsfrpdonbmlakj`ji_ih           "MA(ZLE;F;"m\+u'~j;qNUæN\ҳaݽ`ںDTR1:8.43,11*..)--(,+'+*&**%*)+74(53/JD4MG@g]6aX7g\4[SErgGujBi`EodFpf9aX?rfED?EE@GFAHGCJJFNMʪ    mٽQʮ{{zzzzyzyyyxxxwwvuutsr~q}p~|o}{l{ylzxivuhvterpeqpbmlalk`ji_ih            0)*$ -&%TG@7% H='~j&zg%vcNVæRVçcGVS=KI8CA5><187/54-32+//*.-)-,(,+'++)0/*85,?;7QK4JE1RK/ID@f\BkaDoe7_V>wj;i_=pe@wk=kaC}p=e\Apf:WQ$QF$ub$ub#p^0VæTTcGVS?KI9EC5@>3;9/76-21,00*/.).-)-,(,,)10)101B>4JE2QJ7bYBj`Cmc5ZR5@6%RF9m'~j#q_/U¥PQdEVS@MJ:DB5=<3:9076.22,10+0/*/.*.-)--*21)/.5JE/E@2OH7NI9TM8bY:i_EmcDlbPxHsiDsDsD{nE|oDtiJyFvkPCh`Elc=OL6<<8FC4:93:9076.43,00*//(-,'++&*)%)($('#'&"&%"%%!%$!$$ $# $#)84$OD                AHGAHGBIHBJIDKJEMLJRQ   PȬoջq}|q~}q}|q}|q}{q}|q}{p|{p|{p|{p|zp|{o{yo{yo{znzxn{ymzxlyxkyxjwujxvhtsgusfrperpcombnlakj`ji_ih^hg              " %RF#o]_P'j606?˫DSQ>LJ;HE6?>3<;187.33-21,10+0/*//*..*0/*0/0D@/A>>]U?aY:g]7]UBf]Jym>nc>odDsCznFsT{X[aZUKzpXKul?PM9?>6;;398166/65-21,21*0/(,,'+*&*)%)($('#''#&&"&%"%%$-+!%$(52")('85                      BIHBIHBIICJIDLKEMLIQP   pmϵnzxnzxnzxnzxnzynzxnzxn{ynzymyxn{ymzxmyxmzylywlxwkywjvujwvhtshtrfrqepodpocmlbmlakj`ji_ih_ig^hg             90E;% 5.1o_ gW\M/*p9?έFVS=KI9CB5=<3<;2:8/43.32-21,10+00+//+//-652HC4EA:TN2GC9_VFmdKym=j`DrFwQvYRv_Zfiot¯=FD6<;4:9298/43-11+0/).-(,,'++&*)%)($(($''#'&#&&"&&"&%$+*"&&%0.(31                            CJICJICJICJICKJDLKFMLIQPQ[Z   nؼV̱kwvkwulxwlywlwvkwvlxwkwvkwukwvkwvjvtjvujvuitsiuthsrgsqgsqepofrpdnmdonbmkalk`kj_jh_ih^hg^gf          2+80"LA:o"l[)o521<ĤEUR@OL9FC6?=4<;287054.33.22-21,11-32,000>;4EA9OJ:aY;e\HrhBd\BzmD}pM{pT|RxLMUTY_?IG7=<398165/43-21+//).-(,,'++&**%))%)($(($('%,+#''&0.%+*&/-):6*63                            CKJCKJCJJDLKDKJENMFMLHQPOYW   QƬgrqhsrhsritritsitsitsitsitsitsiuthsrhtrhusgsqgsrgsrfqperpdpndpncomblkbml`ji_ji_ih^hg^hf]gf           & 91ZK[L-z+t6=Ʀ@OL?LI:DB6?=5><298055/44.33.32-21-21-114LG9NI8XQ7UOA_XM{pBxlBtiRxWHuPPSYbĮ8@?398165.33,11+0/*..(-,',+'+*&*)%))%)($(($(($(($(($((&,+%)('/.                         DKJDKJDKJDKJENLDLKGPNGONLUT헬  Sɮdonepoepoepofqogrqfrqfqpfqpgsqfqoepofqpepndondpncnmcmlbmlblkalk`kj`ji_ih_ih^hg]gf]gf]fe             D:.' "LAMAWI+s%wd,v9J\YCRO=LI9CB5=<4<;276165054/44/43.435DA2@=498176/43-11+0/*/.)--(,,'++&+*&**&*)%))%))&+*%)))42&*)0@=                    DLKDLKDKJDMLDKJENLELLGONJUTO[Y푧    oջalkblkbmkcmlcmlcnmcnmcnmcnmcnmcnmcnlcmlcmlbmlblkalkamk`kj`kj`ki_ih_ih^hg]gf]gf\fe\fe        <3,%+%\MYK$tb.}-y.}IYVAOLMJ5<;4;:2872761661650555A?2;9:MI5FC:TOLukAmdCqgW]PRmzҼ?KI5<;287054-22,10+0/*/.).-)-,(,,(,+',+'++'++'++',+(,,(-,                ELKEMLDKJENMDLKEPNENMGRPGQOITRNZXR_]]jh킗|wus   U˰[ed\hf]hg]gf^hg^hg_ih_ih_ih_ih_ih_ih_ih_ih^hg^hg^hg^gf]gf]gf\fe\fe\fd\ed[ed[ed              ;3!<22*cS`Q$sa2K\YBPM=MJ=5>=2872771761661661661665><7IFEd\AYTG|p\[SZr~Ծ9BA387055.33-21,00+//*/.).-)--)-,(-,(,,(-,(-,)--).-                       FONENMEONEMLEPNENMEPOEOMGRQGQOJUTN[YP][Ta_\khbroj{xj|ym|k~{k~{m~{j~zj}z  Sg^ol_omZdc[ed\fe\fe]ge]gf^ig]gf]gf]hg]hf]ge]fe\fe\fe\fd[ed[ed[ed[dc[dcZdc             91'"("OBE:&zf"m["kZ+rAQN>MJ4<;5=;2872772762762762776A?@e]:QLPzpL|L|cdXb8>=398166/44-22,11+00+//*/.*..).-).-)--).-).-*..                         ENMEONENMEPOEPNFQPEPNGSQFRPIUSGTRKXVKYWO\ZQ`]UdbXgeYig^nk]ol`qn_qn`ro  pӺS\[U^]V`_Xa`XbaYcbZcbZdc[dc[ec[ed[ed[ed[ed[ed[ec[dc[dcZdcZdcZdcZdcZcbZcb          " /(.'"kZ3sb&xe1GYUBRN4:94;:2872772772772874;:5=<9JGHiaYHxm\naBZU4:9277065.33-21,10+0/+//*/.*/.*..*..*..*/.                     EMLDNMDMLEOMCNLEPNFQODPNFRPFRPHTRHUSKWULZXP^[P^\TcaTcaWgdWhe                  qҹcZmifZkhb{vYecXa`Xb`YbaYbaYcbYcbYcbYcbZcbYcbYcbYcbYcbYcbYcbYcbYcb              " 2+  -& $*_Q)[NXJ,wGZW@PM=MJ4<;4=;2772772772873873987B@ErhRtOMym|ֿ;OK387165/43-22,11-22+0/,10+10*//,32*/.                           EPOEONFQODOMDPNEQOCOMERPEQOESQGSQGURHUSHWTKYVKZWO^[N]ZO`]                       U`Y}uQ[ZUa_T]\U^]V_^V_^W`_W`_Xa`Xa`Xa`Xa`XbaXbaXbaXbaXbaXbaXbaYba             ! 0) '! @6802q`4tc.hY&wdN`\@QN>LI9GE:FC5?=5><3<;2772872762772772877EB498@aZEqgTuEibXwƲ8?>4<;187176.22-32,10,10,10+//,21+//                           DNLEPNCNLCOMEPNCNLCQOEQODQOCQNFSQESQGUSFUSIWUJXUIYV                          Yʰ\g^YwqTa_VecZsoVa_ZifV_^V_^V`_W`_W`_W`_Wa_Wa`Wa`Xa`Xa`Xa`           !("(YL2p`5weB~DUQ=MJ:JG9DB6B@6@>3<;4=;2872:9176276277287398499Ckc=LIS}sYdư9A@4;:4>=198/97087.97/87-54/97,53                           FQODNLCNLDNMCLKBNLCNLCNLBOLCOMCQNBOMERPDQOGURGUR                              W©`lfVqkXsnQZYYkgS\[T]\T]\U^]U^]Wa`V_^V_^XdbWa_W`_W`_ZkhWa`              '"%  " % B8)ZM,aS4udASP=LI5@>2:93;:1651771651662762877A?7DBFtjQypMvV6>=5?=3;:0:8/76.97/97-64/:8.75                       DOMDPNEPNCNMBOMCOMCNLBOMCOMCPMAOLDQNDROBOM                                rҺn]hphd]vq^wrS[ZVfcXkhU^]U^]U^]U^]V_^V_^V_^V`_Wca                )# ("@6"LA1n^5xf6xfEVS@PM9HE9EC8DB4@>3<;2<:0661870540540551651762873984:9AVQDYTP|Y8FC8FC3A>2=;0=;/<9/97.;9.86                             DNMDNLCLKBMKBLKBLJAMK@LJBMK@NL@MKBOM@NL                                    rѸ_ìu^gXql_~wOXWT`^RZYR[ZXmiS\[T]\Va_U_^VbaYfdV_^Xdb             /(1**_Q0k[:oCVR=OL9IF:IF5B@5@>2><2:91:9076076/440540540551662773885>=CZUHb\Sg8EB8EB3A?1><3A>/>;0=:.=:.:7                              EPNDOMBNLCMLBLKAMK@KIAKJ?LJ?JH@LJ                                    p˴[^a_`aZ}v[~wYwqWmi\}wYjgS\[XlhWhfVb`XecV`^                /($"K@"K@%PEJ^ZAQN2<:1;9/76.65.32.22.32.33/430540651764=;6B@C[V=PLKxn=NK9HE6DA1?<3A>.<90=:-;8                         CNLBNLCMKBMKAMK@KIALJAMK?KI@MJ                                    \ʱ^¬_mZ]X}uX{tUlgYwqTda\rmVebS[ZXlhYmjVcaZjg            1* =47/&THOC)ZM6zh9JG9HE7DB2><2=;/87/86.64.43-43-11-21-22.32/43054166277398?QM:FDJrj>PM7HE7GC1A>3C?.>;0?<,;8                               CNLCMKBLK@KI?IH@JH@JI>HF?IH                                      Y\_sae^c\|a`~xay_ytVec[wr[wrWhe             )#0)?5^O1m]9HD4B?4A>0<:1<9.97.65-54,22,32,00,10-11-22.32/44055287388Gf_@[VU~t@PM6GD6EB1@=2@=.>;,;8/=:                             DNMCNLALK@KJALJ@LJ>JH                                     [ŭ^`qn_Z}^Xyr\}\upZpl[qlT`^YqmZvp        *$ )#2*?5K@6CA1>1@=0?;,;8                            AKIAJI@JI?IG>HF                                       qɳ^axȴx°d`d^mfbzd\rn\~w              >5% 4- ?5A7E;9m1>;.97/:8+53-64*10*21*0/*10*..+0/+0/,00-11-22/430552769JG;MJOrjb;,=9.=:*96                               BLJ@IH                                         uѺxһb¬z͹{ʶtbd`mjc~h         $ ,%?5C9+75,64*21)10(.-(/.',+(-,(,+(,,)--).-*/.+0/,10-21.330552775<;>MJD^Y@TP9KG2DA3D@.?<,=9.=9                                                                        tηtɴ_ttm^`\gd^ys           )#  # ;2>4PC"jZ+43'/.)1/',+'-,&**'++'+*'++(,,)-,).-*/.+0/,10-22/44166:IFFb\B\W9LH7GC1B>1A=,=9+<9,;7                                                                           _˲yվb{ι{˷uceaol             .'OBH=(0/&,+&,+%*)%*)%))%*)&**&+*'++(,,)-,).-*/.+00-11.330542778B@:FDLvm7IF6GC0B>1A=+<8*:7,;8                                                                            \tDz_st`_a]_f           +$0)7/VH%.,%+*%**$))$)(%)(%))&*)&**'++(,,)-,*..+//,10-21/43165388:KHBYU=OK4GC4EA.@<0?<+<9,<8                                                                            _ǰyҼbz˷{ʶecfad          % & *$)#/((ZMNA$*)#&&#('#'&#''$('$((%)(%))&**'++(,,)--*..+0/,11.322<:2779KGB^XOvn7JF2EA3C@-?;.=:)96+:7                                                                              \^_scb_b^            *$80#*)#''#('"&%#''#'&#''$('%)(%))&**'++(,,)--*/.+00-21.331653887A@CTP@RN5HD5EA.@=,=:.>:):6                                                                             wѺbĮzϹxƳfgcg                @77/!$$!%$!%%"%%"&%#&&#''$('%)(%))&**'++(-,).-+//,10-22/44277499@PLWyq;MI3EB3D@-?;.=:,;8(85                                                                            vθaxȴabd_            !.(@7$OC!%% $#!$$!%$!%%"&%#&&#''$('%)(&*)&+*',+)-,*..+0/,11.32055287,=:.=:)96                                                                                aɱcư}Ծeeh             =46/#" ## $#!$$!%$"&%#&&#''$('%)(&*)'+*(,+)--*/.+00-21/431663987=OK5GD1C?1A=,=9-=9(85                                                                              a¬{Ҽfîh®           *$  ;3:2"!""#" ##!$$!%$"&%#&&#''$((%))&**'++(-,*..+0/,11.320552875;;=GE7JF3FB3D@-?<+<9-<8(96                                                                           xιd­d{ɶ            3+!!""#" ##!$$!%%"&%#'&$('%((%))&+*(,+)--*..+0/-21.331653886<;>GE=OK4GC4DA2A>,<9.=9(84                                                                               x̷dyŲ          *$E;! !!""#" $#!$$"%%"&&#'&$('%)(&*)'+*(,,)--*/.,00-21/431664987=8LH7HD4EA.@<,=:.=9)85                                                                                  eDZ           J?! "!"" ## $$!%$"&%#&&$''$((%))&**',+(-,*..+0/,10.320542874999B@9MI9JF5EB0A>->:.<9*96(74                                                                                        5- !!%$#" ##!$$!%%"&%#'&$('%)(&*)'+*(,+)--*..+0/,11.321762884;:8?>@MK:NJ:KH6HD1B?.?<0?<+;8)85                                                                                          ! !!""#" $#!$$"%%"&&#''$((%)(&*)'++(,,)--*/.+0/-11.321763:95=<8A@=GE>PL;LH4FC1B?3B?0?<+96*96                                                                                          %! "!"" ## $$!%$"&%#&&$''$((%))&**'++(,,).-*/.,10-11.430553;:4;:6?>;EC>PL;LH4EB2C@3C@1?<-=9+:7,:7                                                                                             !!""#" $#!$$"%%"&&#'&$('%)(%*)&**'++(-,).-+0/+00-43.431:83;96@>6?=:EC>HF:NJ7JF5GC6FC4DA/@<->;.;8-:7                                                                                               ! )'"" ## $$!%$"%%#&&#''$((%)(&*)'+*',+(-,)..*/.,11,11.43/652;93=;5@>8DA9DB?MJ@MK@SO:MJ9LH7JF7HD6FC3B>0@=-<9/<:-96                                                                                          !!""#" $#!$$!%%"&%#'&$''$((%))&**'+*(,+).-*..+21,21.54/650761;92<:4@>5@>7DB7DA8EC:KG:IF;MJ;LI9KG:MI8JF:KG9JG7FC6FB1A>0A>1@=0>;,:7*96                                                                                        *$"!"" ## $$!%$"%%#&&#''$('%)(%))&**'++(,+)-,).-*//+10-43.430970874?=4?=6B@7DB8EB:IF9GD:IF:JG9IE:KG9IE7FC4FB2B>1A>3B?1>;0=:,96*64                                                                                        "" ## $#!$$"%%"&%#'&$''$((%)(&*)&**'++(-,)..+10+10-53,32.75.54/970871;95A>4?=6DA5B?4B?4DA3A?4DA3DA1@=4A>2?=3A>1><-:8-<9-86                                                                                    #" ##!$$!%%"&%#&&#''$('%((%))&*)&+*'++(,,).-).-,21,21.75.54097/98/:80=;0:91><1><1?<2A>3?=4A>3A>3@=4C?0?;0=;/;9/=:/97/97.97,64+64                                                                                     $$!%$"%%"&%#'&#''$('%((%))&*)&**'+*',+(,,*0/*/.,32+32,53-76-64.97.75.:8/:8/970=:/:81=;1=:0:80<:/97-:7-:7,75                                                                                      "%%"&%#&&#''$('$((%)(%))&**&+*'++',+(-,(-,*/.*//*0/,32+11.75-43.65.76.54/97.64/86-75,53-75,43-64-53                                                                                   "&%#&&#''$('$((%)(%))&*)&**'+*'++(-,(-,)/.*10*10+32+10,54,43-64-75,43-86,53-65.75-53.86-43                                                                                         #''$('$((%)(%))&*)&**&+*'++',+(,,(,,(-,)--*0/).-+21+10,31-54,21.75,42-64,54+21,54                                                                                             $('$((%)(%))&*)&**&+*'+*'++(,+(,,).-)--*0/).-*10*/.*0/+10*/.+21+//+10+10+//                                                                                                      %)(%))&*)&**&+*'+*'++(,+(,,(-,)-,).-).-*0/*/.*//+21*//,32+0/,21                                                                                                                   &**'+*'++',+(,+(,,(-,)-,)--).-).-*..*..*/.*/.+//                                                                                                                    #           (,,(-,)-,)--).-).-*..*..*/.                                                                                                                         &!'!                                                                                                                                          .'910)   -&3++%                                                                                                                                             80E:E;4,   ("'"                                                                                                                               6.90?690&   $!    $                                                                                                                            5-+%3+902*!  *$1*)#    )" +$ "                                                                                                                            :2+$.'=4D:;2+%'!1*>5>5-&  $ )" "                                                                                                                    !LA>58/F<%VI%UHA7.'+%5-:1.'    ! !    !                                                                                                        +dU)^P K@I>%VI&YKF;)#  '!'!    % +$ &     !                                                                                                      1q`1q`&YKA7A7G<=4$ .'/(%  '!6.<2:0 +$   #                                                                                                                bR/m]'[M;2/(6.<30)! -'C9!LAE;905-=4F<>4-&                                                                                                       SE)_Q'ZLC92*90 K@"NBC991A7$SG)^P&WJD:5-1*5-5-&    !                                                                                                              L?PC*aS'[N!LA!LA)`Q.k[*aS"PDH=#PD&YK$TG?6*$!'!#  *#4+/( !                                                                                                 SEUG0o_3we/l\*aS-iY2tc.k[$TGA7?6H=I>;3'! ,%4-.(")#5,=37. $                                                                                                    fVcS6|j;t7l.jZ*aS-gX+cU"OC:14-?6 J?H=80+$-&<3 I>F;5-)#)#2+91.&                                                                                                          %}h"r_6~k;t7m,gW$TG%WJ(\N$RFB8;2G='ZL)_Q#QE=45->5 I>F;4," $,%+$   $                                                                                            (r$ye2tb3we3ve+cT"OC#QE*bS,fW'ZL"OC%TH,eV.k[(\N?6-&-&7/91.'  #0)6.-'    *# +$                                                                                                 'o"ta,eV+eV0p_.k[(]O(]O1q`6}j1q`(\N$TH(]O*aS$RF4,  -';2:10)+%2*?6G<>5("   ! !                                                                                              #vc n\*bT*`R3we8n3we/n]5zh9q2uc&XKE:@6G<B7/'"& ;2#PD$SGH==4;3B8F<:1                                                                          jY!p^2sb0p_:rA=43,4,5-+%  !,&'!                                                                              kY%~i>y:r>zC=y/m](]O\MJ>:1@5WIfV^OJ>@6H<TFSEB7.' # % *#-' 2*;22+                                                                                     #wc)uF?};u=y9p+cTL?ZKhWaRRESEeT"r_fUJ>9/;1F:I=>4.' '! ,%8/<21) $1*6.)#                                                                           'p+zG>{4yg6|i7~kcSXIjY&k${gcSWI_PhW@6/'3+E9PCL??57.;1E:F:5-   %   '"&!                                                                           *xBC;u0p_4yf&k%}i!o]$ye)s&laQJ>M@XJUGC87.@6UGcS]NK?=3;1?4;1 '!  *# ("  &!1*)#                                                                           +yyF?|3ve2tc8o7~k*bSG=G=([N.k[,gW%UH I>!MA%UH#PD5-  ,&8/2+" 1*5,-%           [L2q`udvs^olXifRc`N_\K\YHYVFVSDTQ "&!                                                                          ,|JIL,}%|h&l*w?};uEJ?|/m\,eV2tb3ve+bT#QE&XK0p_"q_gVPCA7@6C8>4&!#<3 I>D:2+! !,&-&"      \M3tcrdwt]nkYjgUebQa^M]ZKZWIYVFUSETQCPN@OL                                                                          /PHG*w%~i)u0H?}DE8o)_Q*`R"q^%{g kZZL^O!n\!p^]MB74,7.?5<2 ,%  "3+F;L??5-'   "    "%   5-  7.TFpgxu]pmXifSebO`]N^[K[XIXUGVSCTQAQN                                                                          1OC*w+{*v.QI;t=y@}#ta_P kZ(p)t"r`ZLZKeTbRL@9/7.D9REOB<2 +$ *#6-B7@60)    % )#)#"          PC_pm]ol^pm]nl`pmaqn`pm_pnarodtqdvsfxuiywj{xqxpexu`qn[liVfcPa^M]ZIYWGXUEVSCSPBRO                                                                           0K@~+z0.LPD4ygNB^N_ONAB8H<XJ_PSE8/ "  +$7.7.-& #  %.'2*-& $         %2q`\M[miXifXjgYkhXif[li[li[kh[li[li]mk_ol^nk_olbrpcspeusjzwk|yso~exu_qnYkhWgdSc`O_\M\YJYVHWTFUR                                                                           -GD/40IJ@}#wc,|1+y$xd%|h(r#wcSE!L@)_Q3we2ra(\N#QE&WJ+cT*aS J?,& !6.G<G<=40(.'1)0) '!     +$  4,!I>"l[WgdWgdUfcTdbTecTecVecWgdWgdVfcVheWgdZigZjhYifZkh]mj^lj^nlapnbrpfvtjzxm}{vzl|gzvaroZliUgdRc`N_\M\YKZWHXUFTR                                                                           DH035IDJ+{*u12)t m[7~k=w6|i)_Q)^P3ve8n/m\$RFH=#PD([N&XKE:1*-':2!MA#QEG=900)/(/('!    & +$ $      % ("+%H<7|jTfcSdaSdaSebRc`UebUebTdaUebRc`SdaSdaUdaVfcTdbUebXheYgeXheZjg\ki^mk_olcqodtriywk|ysx{tk|exubsp]nkYifUebO_\L\YIYVFVS                                                                            CN53PEF30,}1/9p0o^;tC9q-iY.jZ5{h2ra%VI>5@6%TG,eV*bS#QEB9A7 I>"MBB8/(%&!/(6./(   +%6.3+         >55-8lRdaQb_Qc`Qa^Qa^Sc`Ra_Sc`Rb_Pa^Qb_P`^Qb_Sc`Sb_TdaRa_Rb`UdaUc`UdbVecYgeYif[jh]li^nlaqnfurhyvm~{m|nol{eyu`ro\mjXifUebRb_N^[L[XIYV                                                                            GS61MGN7/+yKF8n8oEF7~k*aR\MeTYJA6:1J>aQiX]NI==3<2>48/ ("  !0)@6C85,   #*$# +%$      ?6 G=%A7)[N7}kQc`Qc_Rb_Sc`Ra_Rb_Rb_Q`]O`]N_\O_\O_]P_\Qa^Q`]O_\P`]P_\Ra_Sb_Q`^Rb`Tc`VdaVecWecZhf[jh\ki`nlbspgwthywj~zl|k~{gyvbtq_pmXifTfcQa^M^[JZX                                                                             NUŨ4.PN56+zBMI=xA-(p\MM@^O jY]MI=J>ZKhWaQK?907.?5D9;1 +$ %.'>4F;=3 %    (" %  +$-&   .'B91* D9,&?zSdaRa^Ra^Qa^P_\Q`]P_\P_]N_\M]ZN_\M][O_\P`]O^[P`]M][N^[N_\P^\Q`]Q_]O_\P`]R`^Sb_Ra_Ub_WecWfcZhf]li]mjapmctrhzwi|yi|xdyuctq_pm\liWgdTdaN_\                                                                              TæS1MWʫ352AJSI'm*u*w!n\SE`P$wd#vc^N#QE$RF*`R+bS#PD>5>5"NB)]O([ME;/(& -&9091+% -&1) *#   &    <2 )#E;0m]Rb_Qa^Rb_P`]N_\N_\M^[M_\M\ZL]ZL\ZL[XN]ZM[YN\ZN\ZKZXL\YKZXL\YN]ZN\YO^[M[YM]ZP^[P][P_\P^\S`^Tb`Tb_WecXgeZhf^lj_olcsqfxueyvcwtatp^pmZkiWheUdbQ`]                                                                             VȪ12UŨZб13QHR4)t${g)t'oiX1p_9p=x4xf%UHF;#PD)^P&XKH=C9#PD*`R+cU#PD3+ & 1*2+.(+%,&0)1*+%!       *$905-#MB ,wOa^O`]N_\N_\M]ZM]ZM]ZL[YL\ZM\YN]ZM]ZM\YM]ZL[XK[XK[XJZWK[XLZXM\YL[XKZWK[XKZWN]ZN\YM\YN]ZP][Q_]P_\Q_]Ub`UcaXec[ig\ki`nlaspburextcur`qn]nkZjgTda                                                                               Uŧ/X˭Yϰ6.X̮SL4.$xe'oG=x4xf6~kyIE7l2ra7m$zfhWVHZK jX!m[XJ<2.&4,?5>44,3*90C8G;;1 &  & 2*2* % 0(6- )#                &! VHNB%SGQ`]P_\P`]O^\O_\O_\N]ZN^[M\YM\YM[YLZWJZWIXUIYVIXUIXUIYVJXVKZWJYVKYVKZWHWUIYVHWTHXUJYVJXVIXVIWTJXVLYVLYWLZXLYWO\ZO\ZP][Ta_Ta_Wdb[if\ki_nlbsp`so_qn]nk[li                                                                                N6]׷45\Ե9-1MAFH%{g`P lZ&k$yedS^OiX!n\\M?55-=4!LA"MA@780;3G<"NBD:,&   "7.A6:0 )#   % $  !.& ("      " *$.' 4,PD/)O`]O_\N_\N_\M]ZM^[L\YL[YK[XKYWKZXJXVJYVJYVIXUIZWIXUIYVKZWKYVKZWJXUJYVIXUHWTGWTGVSGWTIWTJXUIWUHVTHWTHVSJXVJWUIWUJXULXVN[XMZXP\ZQ^\R]\Ua_WecYge^lj_qn`robtq`pn]mj                                                                               P8Wʬ7Wˬ;Ʀ2.TåH@~-~'ncSiX'l&k0n^(\O+cT0o^+dU"NBG<$RF*_Q([NE:.''!2+C9 H=>52*-&1*:2=44-("  "#"!           /(  !<4)ZMQa^P_\P`]O^[M]ZM]ZL[XL\YKZXK[XK[XJYVJZWIXVKZWKZWJXVKYWIWTJXUIWTIVTGVTGUSGWTGVSGWTGWTIWUIYVHWTIXUFVSGVSGVSIVTIWUGUSHWTIVTKWUJXVKWUNZXNZYP[ZS_]Ua`XdbZhf]li`omaro^ol                                                                                VȪYί5^ںWɫ8KT¥O%}i+x+z"q^2sb;t;u,fV H>$RF-gX,dU#PDF:OBWIC8+$ #,%=3C9?57.4,6-5- *$  (!9080)#   #"                   =44udO^\P_\P^[P_\O]ZO^[N]ZN]ZN^[M[YM]ZL[XL[XL[XKYVKZWJXVKYVJXVJWUJXVGVSHWTGVTGVTGWTGVSGWTHVSHWTHVSHUSHVSETQFURETQHVSGUSHUSFURFTQGUSHTRIUSIUSIUSLXVLXVO[YQ][R]\Ua_Xfd\jh]lj`qn                                                                                 9¢VȪ8]׷8VǩO5%}i)rJC81* # $/(:17. ("   '!5,7. (" +$+$    & "         $(#          (#/)#:1)#N][O_\N][N^[N^[M\ZM][L[YL\YL[XKZWKZXJXVJYVIWUIWUIXUHVSHWUHVSHVTHVTGVSHWTGUSGWTGVSIWTIWTHVSHWTGUSHVTETREUREURETRHWTGURGVSGTRETQESQESPHUSGSQGTRGTRIUSKWUKVTNYWNZXPZYT_]WcaZfd\jh`om                                                                                 :Ĥ[Դ\׶5]طN6*v@~LB4we#vb&k iXXJaQ!l[aQ J?5.:2"MB%UH!LAC9G<$QE&WJ I>.(  /(A8D:91,&%#$$!#)#-&+%#       *$             .'3+.'901*  NBP^[O][P^\O]ZO^[O]ZO]ZN]ZN[YL\YKZXK[XKZWJYVJZWIXVJYWIXVIXUIYVHWTHXUHVTHWTIWUIVTIWTHURHVSGTRGURGURESQFURESQEURETQEURGUSFURGURFSQDTQDRPDSQDRPFSQFTQERPFSQGSQHTRHTRITRLWULWVOZXQ][U_^Xba[hf                                                                                 7>ѯ]ظ[ҳVǩ4MAL*v kZ$xe(o4wf+cT/k[3vd.iY#PDG<$SG+aS)]OG=4-2*>5 I>E;91/(2*907.-% ! &!0)2+)# *$6.7/'" !,&)#       -&,&       %  "  )#!H><4*$H=K?M[YMZXM\YM[YM[YM\ZM[YM]ZL[XN]ZN\ZN\YN]ZM[XM\YLZXLZXLZXKYVLZWJXUKYVJXUJWUJXUIVSIWUHUSIVTHVSHUSHVTFTQFUSFTRFURFURETRFURESQGUSFSQFTQFSQFSQDSPCRPDTQCROFTRESPFSQDRODQOERPFQOGSQFQPHSQJUSKUTNXWPZYR[ZVa_[ge                                                                                  6[ӳ9ã]ֶ1SB/*u jY:r46..'0):0@690 % %90A78/ &  *# ("  $1)/'  # ("    '!   "#     ?6?6% !   ("I=2+!KWUMYWLXVNZXMYWN[YMZXN[XN[YNZXL[YLZWL[YLZXLZXKZXKYWKZXJXVJYWKYVKYVKYVJWUJXVIVTJWUIVTIUSIVTGSQGUSFSQFTRFTRFSQFURFSPFURESQGURGURFTQGURFSQGURFTQDTQDSPDSQCSPCROESPDQOERPDPNCQNBOMCPNEQOEPNEQOEPNHSQHSQISRLVUNWVQZYT_]                                                                                  \ֶ=ͫZѱ1Vƨ*u.A1p`9p"q^L@B7XIcSSFB7@6L@&WJ I>;2>5"MB&WJ#NC80$ ,&;3?6800)/(3,7/3,)#! &!+%*$'"&!)#+%&!    #'"&!"      -& !   !  #("   #*$ D:91(#L@JUSKVTJUSJVTJUSKWUJUTKXVKWULYWKXVM[XM[XM[XN\YMZXN\YMZXM[YM[XLZXKZWJXVJZWIXUJYVIXUIWUIXUHVTHXUHVTHWTGVTGUSGVTGTRGVSFTRFURFURHURHVSGSQGUSFSQGTQFSQFRPDSPDQODSQDRODSPCROCROESPDRPESPDROBROBPNBROBPMDRODPNCPNDPNEPNFQPFPOHQPJTRLVTOXW                                                                                    <ͫYϰWɫ6(pI=x lZ&j"o]VH,fV5zh3tc&WJB8D:#OD!J?:12+;2!K@$QEE;4, '!*$4,904,-&,%1*7.2* " '!90=4/(  & 2++%  0)4,! ,&3,#  !(#   +%/(+%#   $'",&-&'"0)OBJVTITRIUSISRJUSISRJUSISRKVTJUSLXVKWUKXVJWUKXVKXVKXVKXVKWUKYWJXUMZXLYWLZWLZWKYVLZWKXUKYWJXUJXVJXUIVTJXUIUSIWTHUSHUSHUSGTRHUSGSQHTRGRPGSQGSQFRPGTQFRPESQEROESQESQESQETQDSPDTQERPESPDQOERPDRODSPBQOAQNBQNAOMCQOCOMCQNANLBOMDOMDNMDOMENMGQOIRQKTSMVU                                                                                   >ѯ_ܻ6>zK#vc"q^=w1p_*aR0n^3tcSE=4B8VH[LNA<34+5,7/2* (" '!2*B7K?C9.'  "3+903+)# $ & (" &    ")"*$ & !       # & %   *$,% $    .(6/.')#1*:2<37yhM[YKYWLXVJUSJVTISQHTRHRQITRHSQIUSITRKWUJUSKVTJUTKWUKWUKWULXVKWUJXVJWUJYVJWUJXVJXUIXUIXUIWTIXUJWUJXVJWTJWTIWTIUSIWTHURIVTHTRHURHTRGSQHURGRPGTRERPERPERPEQOESPEQOESQDQOERPFSPFRPFSPEQOESPEROETQESPCSPBQOBQNAPMAOMAOMBPMCQNBOLAOM@MKANLBMKCMLCMLDNLFPOGPOIQP                                                                                    ;ȧ7A-~,fV8m3tcL@L@WIG<2*5-"LA&UI!J?7/0);2!K@$PDE;5.-'1*:2=44,*$)#0*9191.($#+$1*-'  1*>591!  !6.7/&!  +%3+$("@7:1  0*2+  /(;2*$ !>5-'#J@ZLXifRb_Qa^O^[KYVJXVIUSJWUITRJVTITRHTRHSQHTRHSQHTRHTRIUSIUSJVTKWUJVTKXVKWULYWKXULYWKXVKYWIXUIWUIXVHVTIXUHVTHWTHVTGVSGVTGURGVSFTRGUSFTQFTRFTRFSQFTRERPFTQERPESQESQGSQGURFRPGURFSPGTQFSPFRPFRPCPNCROCPNCROBPNBQNAPMAQNCQOCROCROBQNBPN@NK@NL@LJBNLALJAMKBMKDMLDNMEMLHPO                                                                                    VȪ,|0n^#sa%zfTF([M0n^+bT#OD"MB&WJ'XKE; )"  )":1C9@65-/(0(5-4,*#  *$90?54,  #4,8/0( $   $ $ !  $*$,% &    ! # $'!,%("  2*90&! )# E;A8*$B9)#=4 bSn}_olVheSb_N^[N]ZLXVJWUHTRHTRHSQHSRHSQHSQHSQHSQISRHSQHTRHTRIVTIUSIWTIUSIWUIUSJWUJVTJWUJVTJVTJWUIVSJWUIUSIWTHUSGURGTRFTQFTRFRPFTRERPFSQERPEROERPFQOGSQEPNFRPEPNFQOFQOEPNFRPEONEQODOMDQODPNDQODRODRODSPCQOESPDPNDROCPNCROCQNCQNBPM@OL?NK?NK?NKAMKANL?KI@LJAKJBLKCLJDLK                                                                                     )q#ta+bT6.K?RD90 (" ("3+8/2*.'4+@6$PE"MB80   /)A7C991.(("'!("&!$% ,&5.:12+$ *$3,/(#("5.7/$ '!?6>5&!  )$4-+%#+%6.0)&!1*?6-&& G< D:2m^QEk}z[mjUfcSc`M\YLZXKWUJVTHTRGRQGRQGQPGRPGRPHTRHSQIUSHSRIUSHSQIUSHSQHUSHTRHVTHUSHVTHUSHVSHVTHUSJWUIUSIWUHUSIVTHUSHUSHUSGTRHUSGSQGURFRPGTQFRPFRPFSQFQOFSQEPOFRPDPNEQOEQOEPNEQODPNERPDPNFRPEQOFRPERPEQOERODPNDRODQNDSPDROAQN@NL@OL?MK?OL?NKAOLANK@MK@MK?JH?KI@IHAKIAJI                                                                                     G<6.  ,%>4B8<22*+$ )")#)" $ "*$7/A7?60)  *%90:1.'!)#6.91-'  2+>580'"  !*%+%'!("*$ $   #2+1* " +%:1/( .'"I?4-3,*[N!F=F;"G=3i{x[miVfcRa_O^[K[XJXVJVTIVTHSQGTRFRPGSQFQOGSQFQOGSQGQPHTRHRQIUSHSQIUSITRIVTIUSHUSHUSGUSHVTGURGVTGTRGVSFTRGUSFURFTRHVSGTQHUSFSQGURFSPGSQERPEQOERPDPNERODOMEQODOMDPNDONEONFQOEOMFRPEPNFRPFQOFSQDRPDQOCQOCOMCQNBOMBQNAOLAPM@OL@QM?OLBQN@NK@NK?LI?LJ=JH>IG?JH?IG@JI@IH                                                                                     33t6{:    &!)#'"$"% % '! #    "+$0).'(! !   !  " & -&2*-&  #4,5-&!  &!0)(" 1*@74-3,>54-7/C:*$% K?NB#J@4rb.xϿgxuYjgUebO`]N]ZLYWIWUHTRIUSGRPHSQFQOGSQEPNFRPFPNFRPFQOGSQFRPGTRHTRIUSHUSIUSIUSHTRIUSHTRHUSGSQFTRFRPFTQERPESPERPEQOERPDPNERPDPNDQODPNDPNDPNDOMDPNDNMDPNDNMFQOEOMFPOFPNEPNFQODOMEQODOMEQODOMDPNDOMCOMCPNCPNCRPDRODROBOMCQNBOMBQNANLAPM@NKAOL=LI>MJ=JH?LI?JH?IH?IG                                                                                    +z2p6|NK>MJ>NK=KH=LI>JH?KI>HF=HG                                                                                     FIRUOT<=Y   "    '!*$'"     $ $ "  #("("  % 8/;2'!*#>46.% -'-',&@7&SG70F;SGOC4qa4rb(xfóŴƵǶɵȰđbtqZkhQb_P^\L\YKYVJXUHTRFSQFRPEQOEQOFQOFQOFQOFQOFQOFQOFQOGRPGRPFRPFQOFSQFRPFSQERPFTQERPFTQERPGTRGTQGTQGTQFSQGTQFRPFTQEQOFSQEQOERPEQNEPNEPNDNMDOMCLKDNMCKJDMLCLKDMLDMLDMLEOMEMLFONEMLFONDMLENMDNMDOMDQODQOCQOBOMBPMANLAOM?MK?OL?OL>OL=LI=LIKH;FD{LQP9=:T[YA        "   1*:2/(    3,5-)#(#,&&!"*$5.2+" 3,?6+%#5-E;80>5YL3,=52l]"dU3m^/v±±ııĩaroVgdQa_O^[K[XIWTIUSHTRFRPERPDONEQODOMEPNDNMFQOEOMFQOEPNGRPFQOGTRFRPGURGSPFTQERPESQEROESPERODRPDQODQODQOCPNDQOCOMCPNCNLCPMCNLCOMCNLCMLCNLCMKCOMCLKDOMDMLEONENLEOMENMENMEONENMFPOENMFQOEONFQOFQOFQOFQOEOMDOMDOMDRPCQNCPMANK?OL>NK@OL?NK@OL?MJ?MJ=JG;IG;ED                                                                                    "ubB::9RRY@>\Z]E     $ "    ,&/))#  % ("   #2*4,  :1E:0) !3,B96/!G=*YM;2 ]O#hX%ND;m5¿¿`qnVgdPa^N][KYVHVTFSQGTREPNFRPDNLDONCMKDONCNLDQODOMERPDPNERPEPNFRPEPNFSPEQOFSQFQOFSQFRPFSPDRODQODROCPNDROCOMDQOCOMCQNCNLCPNBNLCOMCNLCNLDOMCMLDOMCLKEOMCKJDNLDKJDLKDLKELKEMLEMLFNMFMLFONFMLFONFNMFPNFQOFQOEQODPNDPNBNLCPMBPNBQN@MJ@NK@OL@OL>MJ>LI;IF;HF                                                                                   2ucC;8|MUT>?ZYYE]]      +%,%   1*7/("  /(2+.(0*2+%!4-(WJ$MB % H=A8-aS5rb/dV-r.u5aroUfcRb_L\YKZWHXUFRPESPEONFRODOMEQODOMEQOCNLDPNCNLDPNCNMDQODOMDQODOMDQOEPOFRPEQOERPEROEQOERPEQOERPDPNERPDPNERPDPNEQODOMDPNDOMDOMDNMCMKCMLBKICLKBIHCLKCJIDMKDKKEMLENMFMMGONFNMGPOGNMGQOGONGQPGQOGQPFQOENMDOMDOMCQOBOLANL@MK?NK>NK>OK=MJMJ>NK=MJ|8|;<=ZT@B[\EDaGIb        *$6.)# ">5;2)$!,&.(.("G>*YM5.91$jY6.-^Q;n-q@v(p__omUebPa]JZWGVSHVSGURDPNCQOBNLCNMBLKCNLCLKCNLCNLDOMDOMDPNDPNEQNDPNEQNCOMCPNCOMCPNBOMCQNBNLCQNBNLCPNBNLBPMAMKBOMAMKBNLALKALKALJALJBLKBKJBMKBJICLKBIICJIDKJDLKELLFMLGNMGONHPOIPPIQPJQQJRQJRQJQQIRQIRQISRGQOFPOEOMEQOCOMCOMBNLBOLAOM?LI?MJ?OL;MI:IF                                                                                kY/n];u7x;VTV>BX\DD^_GaKLc   !!    % +%2++&  ,'$LB=4  A7LAG=.cU?7!C:,pD:M@w5]nlTdaM^[L[XGXUFSQFSQCNLCPMAKJBOLAKJBNLALJBOLBMKDPMCNLDPNCOMDQNCOMDQOCNMDQNCNLDQNCNLBPMAMKBOMAMKBOMAMKBOMAMKANLAMKANLAMKAMKAMKAKJBLJAIHBJIAHHBJICJIDKJDKKELLFMMGNNHOOIPPIQPJRQKSRKSSLTSLTSLTSKSSKSRJRQIQPISQHRPGQODNLCPNBNL@LJ?MK@MK?NK=KI>MJ=MJ                                                                              gV_O5{h:sVSS@>XZDB_\G__LbNf (#  *$(#  % ("  !;380  "F='SH0* A8/eWH> ZMPE2i[@v,l?s<|k]mkTdaK[XK[XFVSDQNERPCNLCPMBLJANLALJBOLALJBOMALJBOMALJBNLALJBNLBMKDPNCNLDQOCOMDROCOMDROCOMDQOCOMCPNBNLCOMBNLBNLBMKALJALJAJIAKI@HGAJI@GGBJIBIHCKJCJIDKJELKFMMGNNHPOIQPJRQKSRLTSMUTNVUNWVNWVNWVNVVNVUMUTLTSKUTIQPGQPFPOEPNCMKCOM?LJ=JH=NK=KH>NJ                                                                             jX^N-hY?|S<>>S[BC[]EH]JbMMhSU   & $  4-3,  (#/)70"H>?6 #"`RQE1+2j\4n_1gYOCK+WL,XL_nlQb_K]YIWUFWTDSPCQODQNCOMBNLBOLBMKBNL@KIAMK@JIAMK@KJAOLALJBOMAMKBPMAMKBOMBNLCPNBNLCPNBNLCPMBNLBOMBNLBNLBMKALJALJAJIAKI@IHAJI@HGAKIAHGBJIBIHCJIDKJELKFMLGNNHPOIQPKSRLTSMVUNWVPXWPYXQZYRZYRZYQZYQYXPXWOWVMVUKSRKUSHQPFONDPNBMKBNL@MJ?\WCD\^H`aMdOhVmoafҿ $#  (#&!  ;3)$  D:;3#>5![N=5;43-"[N&gX,YN!RG8qb!ND`pmM^[IYVHYVDRO@NL?LJ>JH=IG>JH>HF@LJ?IGAMK?JHAMK?JHANK@KIANL@KIANL@LJANL@LJANL?KI?MJ?KI?MJ?KI?MJ>KI?MJ>JH?LJ>IG?JH>FE?HG>ED@GF?FEAHGAHGBIHCJJELKFNMHOOKUTKSSMVUPXWR[ZT]\W`_Yba[dc]fe^hg_ih`ji`ji`ji_ih]gf[edYbaV_^R[ZOYXKSRISQEOMAJI                                                                      +dU/n].l[QClZ7m=?XXADY_F_aJeNfSkZrïvdz{ʷ   ! )$-' !$ .(*% "-]P,[O %!!ZM;3*UJ-ZN PELA"?8)MDfusL^[FYUDTQDUQ=JH>NKIG>JH?KI>IG@MJ>IG@MJ>HF@MJ?IGANK?KIAOL@LIAOM@LJAOL@LIAOL?KI@MK?IG@LI>GF?JH=ED?IG>ED@IH>FE?HG?FE@GFAHHCJIDKJFMLGONIQPKSRNVUTa_S[ZVb`Xa`[dc`ol`jibmldonepofqpgrpfqpfpodonbmk`ji\feYbaU_^PYXMVUHRP                                                                    WI.k[.l[*cTOB9r0p_VVBBZ[FF`IccOeSmn^cӿ          )$#  '"1+"D</* 6/I?!)RH0aT(#D;#A9;42-$OEI\XCWTAPM=MJIG@MJ>JH@NK?JH@NK?JH@MK>JH@MK>IG@MJ>HG@LJ=GE?KI=FD?JH=ED?IG=ED?IG>DD?EE@FFAHGBIHCKJELLGNNIQPKSRP^\PYXWheV`^Zed_mk`jidpofqpitrkvulxwmyxnzxnzxmywlwvjutgrqdon`ji\fdW`_R\[MUT                                                                  UG^O.l\,gW(]OXI:shWABV\DF^HK`OdRhXqîvɴzɶ          %!=5  "91 ($3-&LBE; D;&"&G>5.'G?'E=IZVAPMJH?MJ>JH?MJ>IG?MJ=HF?KIJHIGIGDD?EE@GFAHGCJIDLKFNMHPOMYWMVUSc`Ua_Zge]mj`kidsqfqpl{ymxwo{zr~|s~tuuut~r~}p|zmxwitsdon_jhZcb                                                                SEZK,hX,gW*bS(^PfU3weAVYDD^[I`LNgQkY[wɵh®                     (#  0)A8 +&)SH.) %!5/.)!QF%"$A9D;!D<:IF6B@8FC6B@6@>8DB7B@7@>:FC9CA;GE9EC9DB:GE9CA;IF:EC;JG;GEJHDD?EE@GFBIHCKJEMLJXVJRQSb_R`^\okYdbezvbpneqpomxwvtvxz{|||{zxur~}nzyits                                                             OBUGXI)`Q)_Q)`R+eVkZiXXBD\\H_bLePhTpîsŰy˶                     $  2,$G>  3,)% ;4&""%"2-=6/*$<6#40+640:84GC1;:3?=4=<5?>7CA7A@:FD9DB8DB9FC9EB:IF9EC:IG9FD;JG:FD;JG:FD;IF:FD:HF:GD:GE:GE:FD;GE:EC;FD:@?;A@;BA=ED>DD?FEDRPBIIIVTHRQN[XQc`YnjXfc]soezvevthvtop|ztwz} ""                                                          G<QCSFPB$TG%XJ*bSgV.l\(qB\ZGH_LaOQglZre                       +& *%0*4/$!+')%1,93,HA/C>/EA0=:2><1>;4DA4?=6B?6@>8DB7@?9FC9EC8EC9GD8EC9HF8DB9IF8DB:IF8DB:HF9EC:HE9EC:GD9EC9CB9A@9?>:A@;A@0:8/982?<2>;4?=6C@5?=7DB7B@8EC8EB8EC9FD8DA:HE8B@8GD7B@8GE8CA9FD8B@8CA7>=8>>:A@:A@DD@GFIa]FONYwpLUT`y[to`zueyui~zm}zvuz~ "!   "                                                       @6G<RDSEI=A7?5#QE.k["ta!p]EF[_IdLggjXpvdz            -)(% )&,(*>9%/-,85->;/:80;9/:81?<1><1=;5B?4?=6DA5A?7DA6B?6B?7C@6@>8EB5@>7EB6?>7CA6<;8@?8>>:@?@RO>EDLohK\XLa]]~vUmib~x]pmg|xl{"! "!!!                                                     ?6I>OBVHSEF;<3=4$TGeT!p^&nZ^HabNfTiY]fį      "" #! /+$51",*%0.)42)52+75.:7/971=:/;9/;91?<0<:2@=3><4A>4?=4><5@>3=;5A?B[U8@?7=5B8'[MeUfU]GI`McRhWosİjʵ            "!" " #!$"%#&#)&!-+'20&0.)41+75*53+75.:8.86.:7/:86GC3=;%""%" """"#!# "$  #""!"  " "                                             @6C9I>#RERETFOBE:D:!MA*cTdS\LH`JNcRjV[xͷ          !"!!!# !!#  # "# %""$!!!#!# "# # !  $!!"#  "# #" #! # ""                                      <3?5C9I>"OCL?M@I=E: K@%XJ-jZaQUG^LbPfUo­rİ}Ծ          "!" # !!$!!!!! !$!"%!# """" $ !!# !#$ !##  #""!" "!                                5-;2@6D:H=J>B7A7A6C8"PD)_Q.l\]NUG_eOiUotȳ            ! ""# !# ""%!!""!"!$ "#%!"# $  #" """!" !                .'6.=4C9G<H=G<:06-9/@5#RF)`Q,gWZK[LNfRm[a             !! "!# $ !$!# "%!"!""! $ ""$ "#$ !#$  #""!! !!          '!1);2C9I> K@ J?F;8/0(4+>4#RF'[M'\NWIeTfTlqïzѻ               "" # "!$ !#" "!"  $ !"$ !#$ !## #""!! !             ",%8/B8 K?"PD"PD!MA@5:02)4,A6$TG$SGG;SF o]iY^            # ! $ !"$!!#$!!"!" "  $ !"$ !#$  ## ""!!!               ("3+?5I>#QE$TG#SF!MB@5:05,9/H<&ZL"PD;1PC6lo­_             "!# "# #""  "  " !!##!## #""!!                  $-&7/C9 LA#RF$TG#QE K@=37.5,:1M@*bS#SF5,M@6~kxϹ              !""" # " $ "!"  " "!!## """!!!!!!                !& .'8/B8I>!NB!NB J?D97.1)0(7-K?+eV&YK8/MA3we                   " "! #!!$ !""!"!!!  !"!"!"!"!!               !%+%3+;2A7D9C9?691 /' *# +$1)A6)`Q'[MD:SE0q`%!                    " " !# !#  "$ !!!    "!"!" !               !% +$0)5-7.6.3,/( (! & ' -&9/K?$UHF<WI/n]"                      "!""" ""    !                       #&!)#+$+%+$*$ & & )" /'8/D9 K@B8UG/n]#                          !!!"!"" #"! !                    !"#$& ("*$ )" ,%1(7.>4F:!MA>4J>.l\$                       "!"! #! #! #                   !& &!& $#"#% )"-& .'4+:1@6F:I=K>E:@6+dU"                               "  "  "!                   " ' +#1)1).'+$& $"$(".'1)9/@5F:I=J=G; L@ K?K?!                        !!!!"                     $ *#0(3+:1804,.''!!  ")# .&7.?5E:G;E9?4@6A8QD#                       ! ! ! !!                     " )"/(4,7.=4:15--&#     $ .'8/>4@6<24+1)0)!MB!                       ! !                         # *#/(4+6-:16./(&      # .'6-905- -&'!#*$ o\!                                                   $ )# -& /'2*.''!         )"3+8/7.0( & "%6.                                                      # % %'!"         ("4,<2=38//(,%+$5- LA                                                                  !$#       )"5,=2?5<26-/'/(4,F;(]O                                                             Release_v0.3/kernels/compiler_clz_int.cl000066400000000000000000000001701223142177000205200ustar00rootroot00000000000000kernel void compiler_clz_int(global int *src, global int *dst) { int i = get_global_id(0); dst[i] = clz(src[i]); } Release_v0.3/kernels/compiler_clz_short.cl000066400000000000000000000001761223142177000210730ustar00rootroot00000000000000kernel void compiler_clz_short(global short *src, global short *dst) { int i = get_global_id(0); dst[i] = clz(src[i]); } Release_v0.3/kernels/compiler_convert_uchar_sat.cl000066400000000000000000000002221223142177000225650ustar00rootroot00000000000000kernel void compiler_convert_uchar_sat(global float *src, global uint *dst) { int i = get_global_id(0); dst[i] = convert_uchar_sat(src[i]); } Release_v0.3/kernels/compiler_data_types.cl000066400000000000000000000030531223142177000212160ustar00rootroot00000000000000/* OpenCL 1.1 Supported Data Types */ __kernel void compiler_data_types() { // built-in scalar data types (section 6.1.1) bool b; b = true; b = false; char c; unsigned char uc; uchar uc_2; short s; unsigned short us; ushort us_2; int i; unsigned int ui; uint ui_2; long l; unsigned long ul; ulong ul_2; float f; half h; size_t sz; ptrdiff_t pt; intptr_t it; uintptr_t uit; // built-in vector data types (section 6.1.2) // supported values of $n$ are 2, 3, 4, 8, 16 for all vector data types #define VEC(sz) char##sz c##sz; \ uchar##sz uc##sz; \ short##sz s##sz; \ ushort##sz us##sz;\ int##sz i##sz; \ uint##sz ui##sz; \ long##sz l##sz; \ ulong##sz ul##sz; \ float##sz f##sz; #if 1 VEC(2); VEC(3); VEC(4); VEC(8); VEC(16); #endif float16 f_16 = (float16)(1.0f); f_16.s0 += 1; f_16.s1 += 1; f_16.s2 += 1; f_16.s3 += 1; f_16.s4 += 1; f_16.s5 += 1; f_16.s6 += 1; f_16.s7 += 1; f_16.s8 += 1; f_16.s9 += 1; f_16.sa += 1; f_16.sb += 1; f_16.sc += 1; f_16.sd += 1; f_16.se += 1; f_16.sf += 1; f_16.sA += 1; f_16.sB += 1; f_16.sC += 1; f_16.sD += 1; f_16.sE += 1; f_16.sF += 1; float8 f_8; f_8 = f_16.lo; f_8 = f_16.hi; f_8 = f_16.odd; f_8 = f_16.even; uint4 u_4 = (uint4)(1); // Other built-in data types (section 6.1.3) image2d_t i2dt; image3d_t i3dt; sampler_t st; event_t et; } Release_v0.3/kernels/compiler_degrees.cl000066400000000000000000000001771223142177000205030ustar00rootroot00000000000000kernel void compiler_degrees(global float *src, global float *dst) { int i = get_global_id(0); dst[i] = degrees(src[i]); } Release_v0.3/kernels/compiler_displacement_map_element.cl000066400000000000000000000006641223142177000241040ustar00rootroot00000000000000kernel void compiler_displacement_map_element(const global uint *in, const global uint *offset, int w, int h, global uint *out) { const int cx = get_global_id(0); const int cy = get_global_id(1); uint c = offset[cy * w + cx]; int x_pos = cx + c; int y_pos = cy + c; if(0 <= x_pos && x_pos < w && 0 <= y_pos && y_pos < h) out[cy * w + cx] = in[y_pos * w + x_pos]; else out[cy * w + cx] = 0; } Release_v0.3/kernels/compiler_double.cl000066400000000000000000000003721223142177000203340ustar00rootroot00000000000000#pragma OPENCL EXTENSION cl_khr_fp64 : enable kernel void compiler_double(global double *src, global double *dst) { int i = get_global_id(0); double d = 1.234567890123456789; if (i < 14) dst[i] = d * (src[i] + d); else dst[i] = 14; } Release_v0.3/kernels/compiler_double_2.cl000066400000000000000000000003731223142177000205560ustar00rootroot00000000000000#pragma OPENCL EXTENSION cl_khr_fp64 : enable kernel void compiler_double_2(global float *src, global double *dst) { int i = get_global_id(0); float d = 1.234567890123456789f; if (i < 14) dst[i] = d * (d + src[i]); else dst[i] = 14; } Release_v0.3/kernels/compiler_double_3.cl000066400000000000000000000003221223142177000205510ustar00rootroot00000000000000#pragma OPENCL EXTENSION cl_khr_fp64 : enable kernel void compiler_double_3(global float *src, global double *dst) { int i = get_global_id(0); float d = 1.234567890123456789f; dst[i] = i < 14 ? d : 14; } Release_v0.3/kernels/compiler_double_4.cl000066400000000000000000000003101223142177000205470ustar00rootroot00000000000000#pragma OPENCL EXTENSION cl_khr_fp64 : enable kernel void compiler_double_4(global double *src1, global double *src2, global double *dst) { int i = get_global_id(0); dst[i] = src1[i] + src2[i]; } Release_v0.3/kernels/compiler_event.cl000066400000000000000000000001651223142177000202030ustar00rootroot00000000000000__kernel void compiler_event(__global int *dst, int value) { int id = (int)get_global_id(0); dst[id] += value; } Release_v0.3/kernels/compiler_fabs.cl000066400000000000000000000001721223142177000177730ustar00rootroot00000000000000kernel void compiler_fabs(global float *src, global float *dst) { int i = get_global_id(0); dst[i] = fabs(src[i]); } Release_v0.3/kernels/compiler_function_argument.cl000066400000000000000000000002011223142177000226000ustar00rootroot00000000000000__kernel void compiler_function_argument(__global int *dst, int value) { int id = (int)get_global_id(0); dst[id] = value; } Release_v0.3/kernels/compiler_function_argument0.cl000066400000000000000000000002041223142177000226630ustar00rootroot00000000000000__kernel void compiler_function_argument0(__global int *dst, short value) { int id = (int)get_global_id(0); dst[id] = value; } Release_v0.3/kernels/compiler_function_argument1.cl000066400000000000000000000002571223142177000226740ustar00rootroot00000000000000__kernel void compiler_function_argument1(__global int *dst, char value, short value0, int value1) { int id = (int)get_global_id(0); dst[id] = value + value0 + value1; } Release_v0.3/kernels/compiler_function_argument2.cl000066400000000000000000000005501223142177000226710ustar00rootroot00000000000000__kernel void compiler_function_argument2( char8 c, uchar8 uc, short8 s, ushort8 us, int8 i, uint8 ui, float8 f, __global float8 *result) { result[0] = convert_float8(c); result[1] = convert_float8(uc); result[2] = convert_float8(s); result[3] = convert_float8(us); result[4] = convert_float8(i); result[5] = convert_float8(ui); result[6] = f; } Release_v0.3/kernels/compiler_function_constant.cl000066400000000000000000000002401223142177000226120ustar00rootroot00000000000000__kernel void compiler_function_constant(__constant short *c, __global int *dst, int value) { int id = (int)get_global_id(0); dst[id] = value + c[id%69]; } Release_v0.3/kernels/compiler_function_constant0.cl000066400000000000000000000002761223142177000227030ustar00rootroot00000000000000__kernel void compiler_function_constant0(__constant int *c0, __constant char *c1, __global int *dst, int value) { int id = (int)get_global_id(0); dst[id] = value + c0[id%69] + c1[0]; } Release_v0.3/kernels/compiler_function_qualifiers.cl000066400000000000000000000004241223142177000231310ustar00rootroot00000000000000/* test OpenCL 1.1 Function Qualifiers (section 6.7) */ kernel void compiler_function_qualifiers() __attribute__((vec_type_hint(float))) __attribute__((work_group_size_hint(4,1,1))) __attribute__((reqd_work_group_size(4,1,1))); kernel void compiler_function_qualifiers() { } Release_v0.3/kernels/compiler_gather_register_file.cl000066400000000000000000000004141223142177000232340ustar00rootroot00000000000000__kernel void compiler_gather_register_file(__global uint *src, __global uint *dst) { __gen_ocl_force_simd16(); int id = (int)get_global_id(0); const int x0 = src[id]; const unsigned short index = get_global_id(0); dst[id] = __gen_ocl_rgather(index, x0); } Release_v0.3/kernels/compiler_gather_register_file0.cl000066400000000000000000000004221223142177000233130ustar00rootroot00000000000000__kernel void compiler_gather_register_file0(__global uint *src, __global uint *dst) { __gen_ocl_force_simd16(); int id = (int)get_global_id(0); const int x0 = src[id]; const unsigned short index = 15 - get_global_id(0); dst[id] = __gen_ocl_rgather(index, x0); } Release_v0.3/kernels/compiler_gather_register_file1.cl000066400000000000000000000004601223142177000233160ustar00rootroot00000000000000__kernel void compiler_gather_register_file1(__global uint *src, __global uint *dst) { __gen_ocl_force_simd16(); int id = (int)get_global_id(0); const int x0 = src[id]; const int x1 = src[id+16]; const unsigned short index = 2*get_global_id(0); dst[id] = __gen_ocl_rgather(index, x0, x1); } Release_v0.3/kernels/compiler_geometric_builtin.cl000066400000000000000000000003661223142177000225710ustar00rootroot00000000000000kernel void compiler_geometric_builtin() { float x = 1, y = 2, z = 3; z = dot(x, y); z = cross(x, y); z = distance(x, y); z = length(x); z = normalize(x); z = fast_distance(x, y); z = fast_length(x, y); z = fast_normalize(x); } Release_v0.3/kernels/compiler_global_constant.cl000066400000000000000000000032201223142177000222260ustar00rootroot00000000000000constant int m[3] = {71,72,73}; constant int n = 1; constant int o[3] = {3, 2, 1}; constant int4 a= {1, 2, 3, 4}; constant int4 b = {0, -1, -2, -3}; struct Person { char name[7]; int3 idNumber; }; struct Test1 { int a0; char a1; }; struct Test2 { char a0; int a1; }; struct Test3 { int a0; int a1; }; struct Test4 { float a0; float a1; }; constant struct Person james= {{"james"}, (int3)(1, 2, 3)}; constant struct Test1 t0 = {1, 2}; constant struct Test2 t1 = {1, 2}; constant int3 c[3] = {(int3)(0, 1, 2), (int3)(3, 4, 5), (int3)(6,7,8) }; constant char4 d[3] = {(char4)(0, 1, 2, 3), (char4)(4, 5, 6, 7), (char4)(8, 9, 10, 11)}; constant struct Person members[3] = {{{"abc"}, (int3)(1, 2, 3)}, { {"defg"}, (int3)(4,5,6)}, { {"hijk"}, (int3)(7,8,9)} }; constant struct Test3 zero_struct = {0, 0}; constant int3 zero_vec = {0,0,0}; constant int zero_arr[3] = {0,0,0}; constant float zero_flt[3] = {0.0f, 0.0f, 0.0f}; __kernel void compiler_global_constant(__global int *dst, int e, int r) { int id = (int)get_global_id(0); int4 x = a + b; dst[id] = m[id%3] * n * o[2] + e + r *x.y * a.x + zero_struct.a0 + zero_vec.x + zero_arr[1] + (int)zero_flt[2]; } // array of vectors __kernel void compiler_global_constant1(__global int *dst) { int id = (int)get_global_id(0); dst[id] = c[id%3].y + d[id%3].w; } // structure __kernel void compiler_global_constant2(__global int *dst) { int id = (int)get_global_id(0); dst[id] = james.idNumber.y + t0.a1 + t1.a1; } //array of structure __kernel void compiler_global_constant3(__global int *dst) { int id = (int)get_global_id(0); dst[id] = members[id%3].idNumber.z + members[id%3].name[2]; } Release_v0.3/kernels/compiler_global_constant_2.cl000066400000000000000000000007671223142177000224640ustar00rootroot00000000000000constant int m[3] = {0x15b,0x25b,0x35b}; constant short t[5] = {0x45b,0x55b,0x65b,0x75b,0x85b}; constant long n[3] = {0x15b,0x25b,0xFFFFFFFFF}; constant long p[3] = {1,1,1}; constant long s = 1; __kernel void compiler_global_constant_2(__global int *dst, int e, int r) { int id = (int)get_global_id(0); dst[id] = m[id%3] + t[id%5] + e + r; } __kernel void compiler_global_constant_2_long(__global long *dst, int e, int r) { int id = (int)get_global_id(0); dst[id] = n[id%3]*p[1] + e*s + r; } Release_v0.3/kernels/compiler_global_memory_barrier.cl000066400000000000000000000011601223142177000234140ustar00rootroot00000000000000__kernel void compiler_global_memory_barrier(__global int *dst, __global int *src) { src[get_local_size(0) * (2 * get_group_id(0)) + get_local_id(0)] = get_local_id(0); src[get_local_size(0) * (2 * get_group_id(0) + 1) + get_local_id(0)] = get_local_id(0); barrier(CLK_GLOBAL_MEM_FENCE); dst[get_local_size(0) * (2 * get_group_id(0)) + get_local_id(0)] = src[get_local_size(0) * 2 * get_group_id(0) + get_local_size(0) - (get_local_id(0) + 1)]; dst[get_local_size(0) * (2 * get_group_id(0) + 1) + get_local_id(0)] = src[get_local_size(0) * (2 * get_group_id(0) + 1) + get_local_size(0) - (get_local_id(0) + 1)]; } Release_v0.3/kernels/compiler_group_size.cl000066400000000000000000000012721223142177000212500ustar00rootroot00000000000000__kernel void compiler_group_size(__global unsigned int *dst) { uint idx = (uint)get_global_id(0); uint idy = (uint)get_global_id(1); uint idz = (uint)get_global_id(2); uint size_x = (uint)get_global_size(0); uint size_y = (uint)get_global_size(1); dst[idz*size_x*size_y + idy*size_x + idx] = idz*size_x*size_y + idy*size_x +idx; } struct xyz{ unsigned short b; unsigned short e; unsigned int o; }; __kernel void compiler_group_size4(__global struct xyz *src, __global unsigned int *dst, unsigned int num, unsigned int c) { uint idx = (uint)get_global_id(0); if(idx>=num) return; struct xyz td = src[idx]; for(unsigned x = td.b;x<=td.e;x++) dst[td.o+x] = c; } Release_v0.3/kernels/compiler_hadd.cl000066400000000000000000000002221223142177000177540ustar00rootroot00000000000000kernel void compiler_hadd(global int *src1, global int *src2, global int *dst) { int i = get_global_id(0); dst[i] = hadd(src1[i], src2[i]); } Release_v0.3/kernels/compiler_if_else.cl000066400000000000000000000003601223142177000204650ustar00rootroot00000000000000__kernel void compiler_if_else(__global int *src, __global int *dst) { int id = (int)get_global_id(0); dst[id] = src[id]; if (dst[id] >= 0) { dst[id] = src[id+1]; src[id] = 1; } else { dst[id]--; src[id] = 2; } } Release_v0.3/kernels/compiler_insert_to_constant.cl000066400000000000000000000002421223142177000227750ustar00rootroot00000000000000__kernel void compiler_insert_to_constant(__global int4 *dst) { int4 value = (int4)(0,1,2,3); value.z = get_global_id(0); dst[get_global_id(0)] = value; } Release_v0.3/kernels/compiler_insert_vector.cl000066400000000000000000000003521223142177000217460ustar00rootroot00000000000000__kernel void compiler_insert_vector(__global int4 *out ) { int tid = get_global_id(0); int4 output = (int4)(0, 0, 0, 1); //black if (tid > 16) { output = (int4)(tid, tid, 1, 1); } out[tid] = output; } Release_v0.3/kernels/compiler_insn_selection_masked_min_max.cl000066400000000000000000000003621223142177000251310ustar00rootroot00000000000000__kernel void compiler_insn_selection_masked_min_max(__global float* src, __global float* dst) { int id = (int)get_global_id(0); if (get_local_id(0) > 5) dst[id] = max(src[id], src[7]); else dst[id] = min(src[id], src[10]); } Release_v0.3/kernels/compiler_insn_selection_max.cl000066400000000000000000000002351223142177000227410ustar00rootroot00000000000000__kernel void compiler_insn_selection_max(__global float* src, __global float* dst) { int id = (int)get_global_id(0); dst[id] = max(src[id], src[0]); } Release_v0.3/kernels/compiler_insn_selection_min.cl000066400000000000000000000002351223142177000227370ustar00rootroot00000000000000__kernel void compiler_insn_selection_min(__global float* src, __global float* dst) { int id = (int)get_global_id(0); dst[id] = min(src[id], src[0]); } Release_v0.3/kernels/compiler_integer_builtin.cl000066400000000000000000000010601223142177000222400ustar00rootroot00000000000000/* test OpenCL 1.1 Integet Built-in Functions (section 6.11.3) */ __kernel void compiler_integer_builtin() { int i = 0, i1 = -1, i2 = -2; unsigned u = 1, u1 = 2, u2 = 3; i = CHAR_MAX; i = abs(u); i = abs_diff(u1, u2); i = add_sat(i1, i2); i = hadd(i1, i2); i = rhadd(i1, i2); i = clz(i); i = clamp(i, i1, i2); i = mad_hi(i, i1, i2); i = mad_sat(i, i1, i2); i = max(i1, i2); i = min(i1, i2); i = mul_hi(i1, i2); i = rotate(i1, i2); i = sub_sat(i1, i2); long l = upsample(i, u); i = mad24(i, i1, i2); i = mul24(i1, i2); } Release_v0.3/kernels/compiler_integer_division.cl000066400000000000000000000002171223142177000224210ustar00rootroot00000000000000__kernel void compiler_integer_division(__global int *src, __global int *dst, int x) { dst[get_global_id(0)] = src[get_global_id(0)] / x; } Release_v0.3/kernels/compiler_integer_remainder.cl000066400000000000000000000002201223142177000225350ustar00rootroot00000000000000__kernel void compiler_integer_remainder(__global int *src, __global int *dst, int x) { dst[get_global_id(0)] = src[get_global_id(0)] % x; } Release_v0.3/kernels/compiler_julia.cl000066400000000000000000000065551223142177000201770ustar00rootroot00000000000000typedef float2 vec2; typedef float3 vec3; typedef float4 vec4; #define sin native_sin #define cos native_cos #define tan native_tan #define normalize fast_normalize #define length fast_length #define mod fmod #define time 1.f inline vec3 reflect(vec3 I, vec3 N) { return I - 2.0f * dot(N, I) * N; } inline uint pack_fp4(float4 u4) { uint u; u = (((uint) u4.x)) | (((uint) u4.y) << 8) | (((uint) u4.z) << 16); return u; } #define OUTPUT do {\ const vec4 final = 255.f * max(min(gl_FragColor, (vec4)(1.f)), (vec4)(0.f)); \ dst[get_global_id(0) + get_global_id(1) * w] = pack_fp4(final); \ } while (0) inline __attribute__((always_inline)) float jinteresct(vec3 rO, vec3 rD, vec4 c, float *ao) { float mz2,md2,dist,t; float res=1000.0f; vec4 z,nz; int update_ao = 1; *ao = 0.0f; for(t=0.0f;t<6.0f;t+=dist) { if (update_ao) *ao += 1.0f; vec3 p=rO+t*rD; // calc distance z=(vec4)(p,(c.y+c.x)*.3f); md2=1.0f; mz2=dot(z,z); for(int i=0;i<9;i++) { // |dz|^2 -> 4*|dz|^2 //if (mz2 <= 4.0f) { md2*=4.0f*mz2; // z -> z2 + c nz.x=z.x*z.x-dot(z.yzw,z.yzw); nz.yzw=2.0f*z.x*z.yzw; z=nz+c; mz2=dot(z,z); } if(mz2>4.0f) break; } dist=0.25f*sqrt(mz2/md2)*log(mz2); if(dist<0.0005f) { res=t; break; } t+= dist; } return res; } #if 1 inline __attribute__((always_inline)) vec3 calcNormal(vec3 p, vec4 c) { vec4 nz,ndz,dz[4]; vec4 z=(vec4)(p,(c.y+c.x)*.3f); dz[0]=(vec4)(1.0f,0.0f,0.0f,0.0f); dz[1]=(vec4)(0.0f,1.0f,0.0f,0.0f); dz[2]=(vec4)(0.0f,0.0f,1.0f,0.0f); //dz[3]=(vec4)(0.0f,0.0f,0.0f,1.0f); for(int i=0;i<9;i++) { vec4 mz = (vec4)(z.x,-z.y,-z.z,-z.w); // derivative dz[0]=(vec4)(dot(mz,dz[0]),z.x*dz[0].yzw+dz[0].x*z.yzw); dz[1]=(vec4)(dot(mz,dz[1]),z.x*dz[1].yzw+dz[1].x*z.yzw); dz[2]=(vec4)(dot(mz,dz[2]),z.x*dz[2].yzw+dz[2].x*z.yzw); //dz[3]=(vec4)(dot(mz,dz[3]),z.x*dz[3].yzw+dz[3].x*z.yzw); // z = z2 + c nz.x=dot(z, mz); nz.yzw=2.0f*z.x*z.yzw; z=nz+c; if(dot(z,z)>4.0f) break; } return normalize((vec3)(dot(z,dz[0]),dot(z,dz[1]),dot(z,dz[2]))); } #endif __kernel void compiler_julia(__global uint *dst, float resx, float resy, int w) { vec2 gl_FragCoord = (vec2)(get_global_id(0), get_global_id(1)); vec2 p=-1.0f+2.0f*gl_FragCoord.xy/(vec2)(resx,resy); vec3 color = (vec3)(0.0f); vec4 cccc = (vec4)( .7f*cos(.5f*time), .7f*sin(.3f*time), .7f*cos(1.0f*time), 0.0f ); vec3 edir = normalize((vec3)(p,1.0f)); vec3 wori = (vec3)(0.0f,0.0f,-2.0f); float ao; float t = jinteresct(wori,edir,cccc,&ao); if(t<100.0f) { #if 1 vec3 inter = wori + t*edir; vec3 nor = calcNormal(inter,cccc); float dif = .5f + .5f*dot( nor, (vec3)(0.57703f) ); ao = max( 1.0f-ao*0.005f, 0.0f); color = (vec3)(1.0f,.9f,.5f)*dif*ao + .5f*(vec3)(.6f,.7f,.8f)*ao; #else color = (vec3)(0.5f,0.0f,0.0f); #endif } else { color = (vec3)(0.5f,0.51f,0.52f)+(vec3)(0.5f,0.47f,0.45f)*p.y; } vec4 gl_FragColor = (vec4)(color,1.0f); OUTPUT; } Release_v0.3/kernels/compiler_julia_function_call.cl000066400000000000000000000064231223142177000230710ustar00rootroot00000000000000typedef float2 vec2; typedef float3 vec3; typedef float4 vec4; #define sin native_sin #define cos native_cos #define tan native_tan #define normalize fast_normalize #define length fast_length #define mod fmod #define time 1.f vec3 reflect(vec3 I, vec3 N) { return I - 2.0f * dot(N, I) * N; } uint pack_fp4(float4 u4) { uint u; u = (((uint) u4.x)) | (((uint) u4.y) << 8) | (((uint) u4.z) << 16); return u; } #define OUTPUT do {\ const vec4 final = 255.f * max(min(gl_FragColor, (vec4)(1.f)), (vec4)(0.f)); \ dst[get_global_id(0) + get_global_id(1) * w] = pack_fp4(final); \ } while (0) float jinteresct(vec3 rO, vec3 rD, vec4 c, float *ao) { float mz2,md2,dist,t; float res=1000.0f; vec4 z,nz; int update_ao = 1; *ao = 0.0f; for(t=0.0f;t<6.0f;t+=dist) { if (update_ao) *ao += 1.0f; vec3 p=rO+t*rD; // calc distance z=(vec4)(p,(c.y+c.x)*.3f); md2=1.0f; mz2=dot(z,z); for(int i=0;i<9;i++) { // |dz|^2 -> 4*|dz|^2 //if (mz2 <= 4.0f) { md2*=4.0f*mz2; // z -> z2 + c nz.x=z.x*z.x-dot(z.yzw,z.yzw); nz.yzw=2.0f*z.x*z.yzw; z=nz+c; mz2=dot(z,z); } if(mz2>4.0f) break; } dist=0.25f*sqrt(mz2/md2)*log(mz2); if(dist<0.0005f) { res=t; break; } t+= dist; } return res; } #if 1 vec3 calcNormal(vec3 p, vec4 c) { vec4 nz,ndz,dz[4]; vec4 z=(vec4)(p,(c.y+c.x)*.3f); dz[0]=(vec4)(1.0f,0.0f,0.0f,0.0f); dz[1]=(vec4)(0.0f,1.0f,0.0f,0.0f); dz[2]=(vec4)(0.0f,0.0f,1.0f,0.0f); //dz[3]=(vec4)(0.0f,0.0f,0.0f,1.0f); for(int i=0;i<9;i++) { vec4 mz = (vec4)(z.x,-z.y,-z.z,-z.w); // derivative dz[0]=(vec4)(dot(mz,dz[0]),z.x*dz[0].yzw+dz[0].x*z.yzw); dz[1]=(vec4)(dot(mz,dz[1]),z.x*dz[1].yzw+dz[1].x*z.yzw); dz[2]=(vec4)(dot(mz,dz[2]),z.x*dz[2].yzw+dz[2].x*z.yzw); //dz[3]=(vec4)(dot(mz,dz[3]),z.x*dz[3].yzw+dz[3].x*z.yzw); // z = z2 + c nz.x=dot(z, mz); nz.yzw=2.0f*z.x*z.yzw; z=nz+c; if(dot(z,z)>4.0f) break; } return normalize((vec3)(dot(z,dz[0]),dot(z,dz[1]),dot(z,dz[2]))); } #endif __kernel void compiler_julia(__global uint *dst, float resx, float resy, int w) { vec2 gl_FragCoord = (vec2)(get_global_id(0), get_global_id(1)); vec2 p=-1.0f+2.0f*gl_FragCoord.xy/(vec2)(resx,resy); vec3 color = (vec3)(0.0f); vec4 cccc = (vec4)( .7f*cos(.5f*time), .7f*sin(.3f*time), .7f*cos(1.0f*time), 0.0f ); vec3 edir = normalize((vec3)(p,1.0f)); vec3 wori = (vec3)(0.0f,0.0f,-2.0f); float ao; float t = jinteresct(wori,edir,cccc,&ao); if(t<100.0f) { #if 1 vec3 inter = wori + t*edir; vec3 nor = calcNormal(inter,cccc); float dif = .5f + .5f*dot( nor, (vec3)(0.57703f) ); ao = max( 1.0f-ao*0.005f, 0.0f); color = (vec3)(1.0f,.9f,.5f)*dif*ao + .5f*(vec3)(.6f,.7f,.8f)*ao; #else color = (vec3)(0.5f,0.0f,0.0f); #endif } else { color = (vec3)(0.5f,0.51f,0.52f)+(vec3)(0.5f,0.47f,0.45f)*p.y; } vec4 gl_FragColor = (vec4)(color,1.0f); OUTPUT; } Release_v0.3/kernels/compiler_julia_no_break.cl000066400000000000000000000066161223142177000220350ustar00rootroot00000000000000typedef float2 vec2; typedef float3 vec3; typedef float4 vec4; #define sin native_sin #define cos native_cos #define tan native_tan #define normalize fast_normalize #define length fast_length #define mod fmod #define time 1.f inline vec3 reflect(vec3 I, vec3 N) { return I - 2.0f * dot(N, I) * N; } inline uint pack_fp4(float4 u4) { uint u; u = (((uint) u4.x)) | (((uint) u4.y) << 8) | (((uint) u4.z) << 16); return u; } #define OUTPUT do {\ const vec4 final = 255.f * max(min(gl_FragColor, (vec4)(1.f)), (vec4)(0.f)); \ dst[get_global_id(0) + get_global_id(1) * w] = pack_fp4(final); \ } while (0) inline __attribute__((always_inline)) float jinteresct(vec3 rO, vec3 rD, vec4 c, float *ao) { float mz2,md2,dist,t; float res=1000.0f; vec4 z,nz; int update_ao = 1; *ao = 0.0f; t = 0.f; for (int j = 0; j < 100; ++j) { if (update_ao) *ao += 1.0f; vec3 p=rO+t*rD; // calc distance z=(vec4)(p,(c.y+c.x)*.3f); md2=1.0f; mz2=dot(z,z); for(int i=0;i<9;i++) { // |dz|^2 -> 4*|dz|^2 //if (mz2 <= 4.0f) { md2*=4.0f*mz2; // z -> z2 + c nz.x=z.x*z.x-dot(z.yzw,z.yzw); nz.yzw=2.0f*z.x*z.yzw; z=nz+c; mz2=dot(z,z); } if(mz2>4.0f) break; } dist=0.25f*sqrt(mz2/md2)*log(mz2); if(dist<0.0005f) { res=t; update_ao = 0; } t+= dist; } return res; } #if 1 inline __attribute__((always_inline)) vec3 calcNormal(vec3 p, vec4 c) { vec4 nz,ndz,dz[4]; vec4 z=(vec4)(p,(c.y+c.x)*.3f); dz[0]=(vec4)(1.0f,0.0f,0.0f,0.0f); dz[1]=(vec4)(0.0f,1.0f,0.0f,0.0f); dz[2]=(vec4)(0.0f,0.0f,1.0f,0.0f); //dz[3]=(vec4)(0.0f,0.0f,0.0f,1.0f); for(int i=0;i<9;i++) { vec4 mz = (vec4)(z.x,-z.y,-z.z,-z.w); // derivative dz[0]=(vec4)(dot(mz,dz[0]),z.x*dz[0].yzw+dz[0].x*z.yzw); dz[1]=(vec4)(dot(mz,dz[1]),z.x*dz[1].yzw+dz[1].x*z.yzw); dz[2]=(vec4)(dot(mz,dz[2]),z.x*dz[2].yzw+dz[2].x*z.yzw); //dz[3]=(vec4)(dot(mz,dz[3]),z.x*dz[3].yzw+dz[3].x*z.yzw); // z = z2 + c nz.x=dot(z, mz); nz.yzw=2.0f*z.x*z.yzw; z=nz+c; if(dot(z,z)>4.0f) break; } return normalize((vec3)(dot(z,dz[0]),dot(z,dz[1]),dot(z,dz[2]))); } #endif __kernel void compiler_julia_no_break(__global uint *dst, float resx, float resy, int w) { vec2 gl_FragCoord = (vec2)(get_global_id(0), get_global_id(1)); vec2 p=-1.0f+2.0f*gl_FragCoord.xy/(vec2)(resx,resy); vec3 color = (vec3)(0.0f); vec4 cccc = (vec4)( .7f*cos(.5f*time), .7f*sin(.3f*time), .7f*cos(1.0f*time), 0.0f ); vec3 edir = normalize((vec3)(p,1.0f)); vec3 wori = (vec3)(0.0f,0.0f,-2.0f); float ao; float t = jinteresct(wori,edir,cccc,&ao); if(t<100.0f) { #if 1 vec3 inter = wori + t*edir; vec3 nor = calcNormal(inter,cccc); float dif = .5f + .5f*dot( nor, (vec3)(0.57703f) ); ao = max( 1.0f-ao*0.005f, 0.0f); color = (vec3)(1.0f,.9f,.5f)*dif*ao + .5f*(vec3)(.6f,.7f,.8f)*ao; #else color = (vec3)(0.5f,0.0f,0.0f); #endif } else { color = (vec3)(0.5f,0.51f,0.52f)+(vec3)(0.5f,0.47f,0.45f)*p.y; } vec4 gl_FragColor = (vec4)(color,1.0f); OUTPUT; } Release_v0.3/kernels/compiler_julia_no_break_ref.bmp000066400000000000000000006000661223142177000230500ustar00rootroot00000000000000BM66(                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""################################################################################################################################################################################################################################################################$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' (!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!)")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")"*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%ME1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#H@7脧[fb2+#2+#2+#TJ@pwpbpm2+#2+#2+#2+#jldߊ2+#򅱶2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$}gy3,$3,$A91]SH~3,$3,$s3,$jld_XN3,$]^W3-'3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%UMCinh3-%3-%3-%3-%3-%`\Snijb3-%3-%N]\kqk{_WL3-%UJ@UK@3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%agaVLBq|wq{uXMBv]`ZUNDq}xXOEUKA3-%3-%3-%3-%3-%3-%3-%akg\^Xo|lyuSLCmzv3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&xbpmј}4.&t}jngzUMC4.&4.&4.&4.&\XO|4.&a`Xj4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&``YD>7x\qq@92g{z4.&]TJZTJ[TJjoiKMHIB9af`uymwrZTKcd\VLAjqlhpkYTKXPFTJ?4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'rzcrp5/'5/'5/'·5/'5/'@:2nuoKB95/'5/'5/'5/'jrlw|5/'b_Ws~5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'XUL5/'ipi|OJAMC:LB9XNCa]Tt~y5/'5/'5/'`YOHNJ[SI`[Qc`W5/'5/'5/'5/'5/'5/'5/'5/'q{vu|z5/'_]Tkqj5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(zӠ60(60(60(60(60(glegtqb\Qhf]60(60(60(~pvo^TI60(YODzx60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(}dQH>]ZQşҌlsmXPFTJ?NF=]VKakf{D@:d_Vb]SQUPu}mqjikc`pn^aZUMCdg`eg_a_W60(60(60(b_U60(]XO60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)XNC71)71)71)71)71)tlph}71)71)71)xnqj71)71)}s71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)QLDlvq94-dg`71)71)71)71)71)71)71)71)}VLB\SIqxqw<5.fd[c^THGAs}x呸QPIVVOQQKpzu\WN_YOOF=A?9tzv{dbY71)s}w`[R71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*ÈZOD82*82*82*82*82*82*82*82*ZPEw{82*82*82*82*`yz^TI^UJ82*ZODq82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*]gc82*dhbϢ82*82*82*82*82*82*82*82*82*[`Zr}xTK@_WLVPGQVRaYOlogrijldtzYQGeplae^[ZSYXP[SI^XMPF<}dpma\Ri|z82*^XNb`W82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+_ZP^VK93+B:2t93+93+93+93+ZPE93+WMB_UI93+93+93+93+nun^TI93+johIB:93+r93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+[\U93+w93+93+93+93+93+93+93+93+_ieelgYheHA8PLDTSLVb_XND_WLSKA93+d]S_VKz93+v{_[R93+}ekd\ZSjnf[QFTJ@RXTzd`WQLCntm򆤤93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,XQGqysnsl:4,]d_:4,|:4,GF@:4,clgounQI?:4,:4,:4,:4,ZPFkkb:4,]UJa]T:4,w:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,cmiOME{:4,n:4,:4,:4,:4,:4,:4,:4,:4,Z[T:4,RLDcic:4,:4,[SI_WMTKA:4,hf]>81qwp\QFx:4,ioi[ZR^WM]UJSI>]ZQankaYOnrkIHBs|vWOF:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-lnfID=;5-j[a\;5-PMFEA;]XO|v}wd_T;5-;5-;5-;5-gh``VJ;5-\TI^VKIOKゥ;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-^rr^\TWPFYRHYRHYQGt{r{a_V;5-;5-;5-B@:OJBbga;5-;5-ID;`kh\UK`XMVND=70pkldc]SzzX\Vbd]pvprysTKAWOEOVRge\qwpisnge\mwq;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.otmc^TӀr;6.;6.LD;VXQJB9]RGd\Rlkb;6.;6.;6.;6.pys;6.ecZjjaW`\␽;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.VPGl;6.;6.;6.;6.[SIZPE]VM;6.SQJu;6.;6.MKE[b]Y]XIE>ZQG{ZSJ]TI;6.;6.eaX;6.]VM;6.;6.;6.;6.;6.;6.luo{UMCWMCW`]^UJ`YOYUM \VL;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/t~x~aolk~|]`Y<7/<7/<7/XWOOIA[^X]SG<7/<7/<7/<7/<7/x}<7/kme<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/]_YfjcXODcbY`\SZPFq{vx<7/<7/<7/a]S<7/\SIWb_~<7/<7/<7/IF?lD<4SQIivr~|VMBfbXb`Wc[P^TH<7/<7/lmdcaX<7/<7/<7/<7/<7/<7/x{VNEWMCɋlwrz`YO<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80c^U=80KD<=80=80QG>]_WOFRH>OF91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91c^T~mzv>91>91>91ZVNYVNXSJahc>91ZPE>91u>91>91>91qun>91`VJ>91]SH|˚۪>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91cf_>91UKAԊĪhkdTI?XMBrzt;4-G?6I@7gf^otm>91;;6>91JTRczzi~>91>91>91>91>91ee\|YQF>91>91c^S>91\SI>91>91>91>91>91>91>91s{taZPXOEZQGe`Wo|wudaW]SH>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92ff^hf]kjb?92cmh?92?92RI?``YRI?goiqyr{gf]VOEsxq?92w?92?92`UJ?92?92c\Rɕҙ۬?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92SLB~B>6?92?92?92?92?92F>6A<5kpiRH>\TITSLURJdaX^UKa[QKA8SMD]c^F?7PKC?92?92?92?92?92?92?92WND^TIcaYhdZd]S?92?92jh_sxqgf]?92?92?92?92?92?92?92c]Td_UXND\VMz^SHp|wmzvc]Sii`?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3sdib@:3@:3@:3VNEac[TJ@|r|wfcYZVM@:3ʓ@:3sztrwobZP@:3יڬ@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3zi@:3@:3@:3@:3OH?YXP`e_dmhdokOZWUKA_XMgh`alhdlg_XNRQJgd[PH>RH>WQH_e_LD;RKB@:3@:3@:3@:3@:3aXNXOD~@:3@:3ii`@:3msk@:3@:3@:3@:3@:3@:3@:3@:3t~b[Q[XPXNCbb[jsm\TI@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4lyuZQGd_UA;4A;4A;4A;4]YQ\ZRZTKb[PbYNllcA;4ВA;4^UJA;4_UIۖլA;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4ĜЙ͂pA;4A;4NF=]^Wgpko{uz}}qXOE]UKWUMzoY^Y]TJVPFTJ?VLB__WPLDQH?ZXOA;4A;4A;4A;4\TJy}`[Rnned\QA;4A;4syqgbX^UJA;4A;4A;4A;4A;4A;4A;4A;4qxq}\VMYODorj]SHA;4ehac]Shf\A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5\SH^TIptlRJAB<5B<5B<5ULBbbZVNDeh`_UJaXLnnehB<5d`VgaW{pvo_VKqݓЪB<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5hpk֤RQJketrB<5B<5XWPgpkrzƒˈB<5WNDnrj^^Ve`VzX[U[VNVMBVLA]ZQ]`YRI@WPFB<5B<5B<5B<5aXNYODB<5B<5ji`B<5rztB<5B<5B<5B<5B<5B<5B<5B<5B<5nrj^TIZRG[SI~ig^B<5_aZ^THa^UB<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6|he[q{C=6C=6C=6WNDdf_UKAuC=6QUPf`UC=6ovC=6otlmlc~UOF~݌ĪC=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6GC<~C=6KE<_e_n~zxƘC=6C=6C=6C=6C=6C=6TJ@b\RktoC=6a[Qxpumflg^[RXODWLB^ZQdibTKAWOEC=6C=6C=6]WMaXMa\RbYNC=6C=6d\Ry|aYNC=6C=6C=6C=6C=6C=6C=6C=6b\R_^VZPEih_fbYOF=punhh_C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7bgaD>7D>7D>7D>7]WNcc[XOED>7vxcrpυD>7q[c^uzbXL^TIfd[zD>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7WQGD>7~NIAdmhq}ėD>7D>7D>7D>7D>7D>7BHEVhhD>7~[QF4.(C?8hf]pvnjsm_]TYOEWMB`\SdicULBXPFD>7D>7D>7fbXZQFzx{D>7D>7e_UD>7d`VD>7D>7D>7D>7D>7D>7D>7D>7D>7w{kkb]VMZPEe_UfbXo{v~v~wTMCD>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8^WMSKBD?8D?8D?8TLCb`X^ZQ^YOD?8y}]TID?8sD?8v|ʍaZOryr`e_D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8sU[UWebPMEdplpzD?8MYVjD?8D?8D?8D?8LC:QKCTRJTSLq{ukldYSJKKDlmeC>7aYN{luo`\SYODYODbaXce^ULBYRHD?8D?8YRHXOEdbYD?8D?8u|uqskD?8D?8D?8D?8D?8D?8D?8D?8D?8b[Qlnf[QG]XND?8^VLD?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9a\RptmkvqE@9E@9E@9WMBef_YPFhjb`YOWPGE@9tqzhe\`VKĜkwrE@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9wE@9@>9ZTJfiblslnwqmtmhkd_[RVLAfkeE@9KJDMIATTMY[T\_Y^c]_e_[a\TLBeaWhleVZToqiJKEjjbkrl^YPYNC[RHfg`^]UVMC\WNE@9nnf\SHhdYE@9E@9gdZaYNE@9E@9E@9E@9E@9E@9E@9E@9E@9~y~_[RZPEc\Qnogirm【c[QVQHE@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:e_UkjaFA:FA:FA:ZRGgibXNCu|FA:wy_ZQopgFA:tzc[Pmoghf\ɚ٤FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:XRIFA:VWQuaaZp|weha_\T`]Ta_WbbYcc[eg`lsmzt|\ULt~SYT]gcfspl~{rtdyy`YOVNDXfckkbX_Zjh_LNI}hld]VLYNC^WMinhZULXNDcd\FA:nogZPFimfsyqFA:FA:{pumFA:FA:FA:FA:FA:FA:FA:FA:FA:FA:d^Tnph[RH]VMy}SMERI?_UJ_VKFA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;`d]NEJE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>^UJ`VKzJE>JE>JE>[QGildZRGlphJE>JE>ie[sFIDo}faWy{JE>yaXṀLLEJE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>abZlJE>ULBa`Xjpipztu~y}×đ{n|xagajrlghaNEJE>|ZRHJE>JE>b[QJE>h}|vc_V[QF]TIhiafibYPE[UKntm`YOJE>JE>d\Qz~fbXJE>JE>JE>JE>JE>JE>JE>JE>JE>JE>b\Qde^\SHy~gcY{s`]TJE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?bZOlkbKF?KF?KF?gql^WMjmeZQFw~KF?KDTRJljaf`VbZOixuMGAMGAMGAMGAMGAMGAMGAMGAMGAeaWoqi^VKd]SMGAMGA_\TzfbXeh`MGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHB`WLgf^qtlMHBMHBMHB[QGijba]Tc^UMHBji`\WNMHBj~|xy~rsjJOKd]SMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHB``XMHBVc`xNjLF>bhbvMHBZea{tMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBa_VSPHVLBikche\A:3GA9F@7>80v}b]S\QF`YOkohef^YQFa[RaWL_VLcYMli`MHBwy{e]Sd]SNH?XUMMHBMHBMHBMHBMHBMHBMHBd\Rb]T\SHbYNe_Tu{tnrkxMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNIC`WLlmemvpoqiNICNICNIC\SHknf^WLii`NICd\QgcYcaXd[PNICk~|{kh^PYVhdZTRJNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICl|y쇣U\WvȋLG@emg|NICmIF?xWWPeyxNICNICNICNICNICNICNICNICNICNICNICb_V_`YZTJig^_YONICcZONIClhh_]TI]TIgf^mtm[TJVNEf`V]TIgaVe^SNICNICgbXOJA\a\RI?jrlNICNICNICNICNICNICNICmmdgh`]TId]RswoNICiqkif\giaNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDc[POJDOJDOJDVPG_XNlph\TIqwpOJDcidrumuzrhh`}OJDk}zd\PUcad]S`_WvƝRTNOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDgmflsm}Paao~OJDfniOJD]rrOJDsEE@OJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDgoiXOEOJD~[RGbYNe`Vy~D@9A>8OJDpxq`YO\RGc^Untna_V\SI]TInne_THNKCOJDX^XnogOJDlwrfkdRH>OJDOJDOJDOJDOJDhe[npg_WMd]ROJDOJD`^Vu|tbZPOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEnytlkammcuzsPKEHB:W_[lzvc^Uloh]TIzPKERI@cZOPKEjmfPKEk}ʍ~z{XkjbYNfkdtԛUXRPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEONH}PKEdmhzxfmhUcaxj{yPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKE73-PKEkpixe`VYODmrjf`VA?9c\QKICJICFE?}daW]SH`YNknfjmfZRHlpid\Rb]Se\QF>6KB8NJCf`Uf_Tf`VZULäUKAPKEPKEPKEPKEc[Pc_U]TInnejg]PKElog[RGPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFwG?7QLFufd[kme^UKQLFQI@QLFrulxxƇQLFk~Ջe\QWjjd]S|j՞i|zQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLF``XQ[XrchbZjhf~~NLD~ˁi{yQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFGE>QLFQLFmytXOE`WLc`Wt{tMXVw~wsxqQLFjkc^UJ^UJgg^ovo]WMRKBc[P`XMie[}MG>LC9|e\Qc[P]XNQLFQLFVMCRQKQLFQLFQLFQLFw~xgh`]TIie[cZOQLFYQHu|ue_UQLF[RGQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGkmeUTMRMGRMG_XNiiahg_`YNRMGSMDRMGe\QhbW~RMGjۊoneFGBie\Yb]РqRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMG|RMGeni{]^WRMG\jguwYZSRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGMRNRMG62,:60RMGdd\waXM^WLjrld]RRMGRMG61*RMGrzs`ZO]SHd`VovocaYVNDxy^UJieZrQMEf`V^SG{PUP~fg_o{vRMGWOEwRMGRMGRMGd^Tnph_WL|RMGnpgswoqumRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNH`XMlnfd\QSNHoSNH\SHkmedaXd_USNHSOGSNHsulSNHvSNHh~}ՈxxEGAnnfjh^śvSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHqTOFzSNH]jge{zhwtSNHӒSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSWRJG@EBSKAYPEqysXMCA91UPJ[XO[YPUPJhdZikc^VKe^SoogUPJgbXrvnPMEUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKYQGf_ThbXUQKUQKUQK}d_Vosl_VLu~wUQKYSJUQKd_TUQKUQKd\Qkh^UQKf_TUQKtyrwsqUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKxUQKXWPu`b[UQKUQKYc`PNF{|]jgUQKUQKUQKZ[Tivqt~ŞUQKKTQpUQKUQKUQKUQKRLCy|a[QaXMki_UQKUQKUQKUQKw~b\R^TId`VpvohiaWPFd[PaYNnnd]^Wgpkqsjlj`bYNSNETJ@UMCb^Ui|yee]lzv][SgbXnog`YNfaWUQKUQKu|uqske_UUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLd\PVRL@;5VRLge\nrj_WL~VRLVMCVRLfbYVRLVRLf`VVRLjf[81*opggnNOIVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLyr|VRLiuqyjzVRLVRLWebowuvVRLVRLlsla_VPKC]_X|kyVRLVRLJJD_omVRLZrsVRLNNHG@7LC:@804.'UMCaWL^UKgaVLD5A91p}x~\SHu|d\QbXLTMC?91WSMWSMih`_VKaYNkldqys\WMd]R_VKkg]d[PWSMWSMrvnoJA8V\X_`YXQGUK@ZVMZQFz[RH[\UULBsmnefe\^VKd\QhcYWSMnxsc[Py}ff]WSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNkjaXNCcZOzUb_oxr^VKlnfijab[QXTNYTJqrjf_TiiaXTNXTN~tyrdZOH?6e]ReXTNetXTNXTNSME[`[\]VXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTN^c^scg`XTNXTNLTQ`uuhkd~˜ƚʝϜ̔^WMVMC\WNWNDA91KC:OG>QJAQKBQKBOIAJE=c\R\RHmqjy|hcYWUMPI?A:1B=5nrj`XM_WLih_s{uc`W_WLoofXTNXTNnndv|uy{emh_^VWOEUKAikdovoXTNemhUQHswpijb_WLopgd\PXTNXTNqskkh_Y\WXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOmmdrryrhdZYUOoYUO_\S_WLnqjfd[e`VYUO[XOe]RYUOy{loghaVYUOYpqYUOsz~aUHKC:e]SlYUOa}YUOYUOWPFdjd__WYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOiuqpYUOhvrjy؇YUOYUOYUOingyǧbxwqYUOYUOYUOYUOYUOYUO܋^XNWOE_]UWOETVP\_Y]a[[_YX[UIKEy~[RHhg^f`VjsngaWPH?ZXQQKBB;4t}vb[P_VKfdZrzshiafe]`XNebXYUOYUOYUOYUOxxkf\lkbU[Vhpj]ZQWMCVND\TJ[SIitp`e`UOFd^Sfg``XNif\YUOYUOif\y{YUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPig^wpqh?81ZVPZVPrzaYOpumc_Uhf]ZVP^\T{}ZVPotmZVP]stSTNq^{}~f_SNH?jf\qZVPa~ZVPZVPYRHflfa`XZVPITRZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPz\b]ZVPk}zo|wZVPZVPcbZYNCiiavz|~zoun`ZP\TIXODKD6ckf[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ^c^ZXP}[VQ[VQjoh}hia|yy{}~Ә}]VLee]singVNEac\XTL|hqly{a[QRNFqrj[WNVSKURJfbY_UJc]SoslowpVQHnneb[Q[VQ[VQw}vhcXc[P__WjtnaaYXPFVLB]UK[SI[VQVUMhdZe^Sd_V^WL[VQnzuz~eaW^WL[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WRKC:caYrtlƿ\WR\WRJC:fbXqwpaYOrwpozt`d^\WRtvn\WRe~y\WRvzraUIllc\WR\WRFHCY_Zm]VMhmffg^LF=flf\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WRbnjt{\WRksmws}wmrkx\WR\WR\WRNH?SMDZYQbgajuqptÇč_[R^\T\WR\WRTLC^c]scZO`XMcZNbtro{vUOFZ[TF@9ih__VKb[Qmphs|u\XOg`UaYOe]Qjf\QOH\WRf`VwyjuqXNDdkfjrl]XOWMC^WMpwpVLB|T[Vki_fe\_WLljajf\\WR\WRe^TWTL\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XSxw}vrtk]XS]XSVXRig^qvoaYOx|gZSIxy]XSg`Ug`U]XSlv}]XSsulLB9u{s޳]XS]XSGLH}nr_YOhldhjbQKBhmg]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XShpjdus]XSdicv]XSipjfg^pxqqyrx]XS]XS]XSTLC\ZQfkdp}y{U]YMURe{z]XS]XSr{TLC]XSA;3ki_^VKzopgqumRKBUWQlnf`WMaYOklds}wa_V{aXNpqhYkkMH?ie[]XS[\Ughawjsmde]ZRHVMB[RHZQF]XSSTMija_WM}yz]XS]XSkg]hngt|v]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YTe]R^YTUTMZd_kkbpumbZP~oYQGd]S^YTtwn^YToi^YT^YTh`UND;}v{s^YT^YTO[Y^YTyt`\RgjblogSMDioi^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YT^YTjsnUWQuvv^YTbd]u~t{pwp\ssito}^YT^YT^YTYSJdf^pzu{ʭebkfGE>]gb^YT^YT^YT^YTVa^cqoF?8gqlD?8rvn]TIs}wf`Utzr^YTywLF>pumaYOaXNii`s}wgg_e^S`WMhcXf_TKB9CBQRMJHBRYT95/^ZUc\R]TJoslhcX\\Tnne^ZU^ZU^ZUclgt}wb[Q`XMhf]s|uhldjf\`XMxxm~NERUPVNDXQHtة{\TJhg_gaWXSIie[dmh\ZQcyyU^Ze`V`WLeaXqxqpxqmlc`YOfdZd\QSTNRNEoofje[`\Wcc[]UJmmvpa`XYPF\SIntm`\Wȱljac_U_XN`\W`\Wd^T[XPty`\W`\W`\W`\W`\W`\W`\W`\W`\W`\W`\W`\W`\W`\W`\W`\W`\W`\W`\W`\W`\W`\W`\W`\W`\W`\W`\Wa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]XTNEfaW{a]XQH>iuqc]Srxqgd[if\ja_VicYa]Xig]a]XjsYmmCB=jg]{~^TIkf\RI@gbXsa]Xa]XKHAeibgg_c`WyWOEqzta]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xfpk]jfn}zhwt`^U`_Wa]Xfzy|dg`xa]XYRIfiava]Xa]XRYUXND]VL[RGa]Xa^TYXPa]Xa]Xa]Xa]Xa]Xa]Xa]XgvsaYNfcZf`UVOFy{[XPRJ@^mkfcY`WLe`Vqvor|vye^TeaXli`a]XwW\Wf^Sopgk{ykrlpxqovpjrlimf[TKWMC[RHa]Xummdd\QebY`YN}a]Xa]Xzsa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xa]Xb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^YWQHkvrd^TlkahaWb^YXTKfqle`Wsyre`Wlkb]xzcc[xwli`ji`b^Y}g{z\rrVa^bXLw}vNH?haVUMDjh^wb^Yb^Ydrnszjjbb^T~WOEs}xb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Ylyue}}qi{yZTK^\Tb^YW^Y^c]vr{}b^YXRIgibxb^Yfg_YPF\RGQZWz[RGb^Yt~͐o}b^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^YeaWki_TLCb^Yb^YTPH71*hf]`WMd_TotltyebYf_Te`VD@:l`kg|gaVd\Qob^Ynvp\TIlxsmvq^ZQWNDb^Ued\baXnmehf^`YNkg]kh^b^Yb^Ye^Sqsjee]}b^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yb^Yc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zguqlqjf`UŽUZUbe_[_Xgd[szsc^Torifjceg`oofc_Zpqigh`c_Zg{z]stbutkf\XRIe\QXSJoof{c_Zc_Ztœhjbkmea\RWNDu|c_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_ZYZSdqnyejcc_Zc_Zbrogmfxc_ZWRIeh`wWLA\QFٲ~d`Wc_Z[SILG?jtoQRKPMFc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc^T}QI@y{c_Zc_Zc_ZNLEji`aXNc]Snrjv{[YQz|d^T׼MJBQYVpqhpqhgbXKC:dd\^VLultncc[YQG\TIjmec_Z~iiaaYNpqhc_Zc_Zmkbt{tc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zc_Zd`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[haWg_ThrmQNGjh_t{tc]Rsyqdd\hleg`Vd`[gaWlogd`[d`[vd`[jf_ThaW[WNcXM\ZQsvnhdZ~d`[NMG~Ԉb^UmqiaZPXNDxd`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[f|{jzx_nlp~z|d`[d`[pqr}}WUMce]t~UK@}d`[fcZCGCj}}d`[UOF_]Tirld`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[prib\RaXMkh_d`[d`[d`[d`[d`[kldaYNc\Rnphv{``XhcXc]Ryz~d`[IB:hcXxzd`[D>6lrkmqiu|krlimf[UKVMC\UJd`[bd]priikbaYOrtkf`Ud`[d`[ie\veaWd`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[d`[ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\KG?H@8txoPG>mzvb_Ulldszsc\Rx{]VMjqksvnea\w}vnrkea\f\c^_rqidZli_XVNbWK`aZv}uki_ea\bolea\ea\_YOosl_XNXNDzea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\`mip{ea\ea\Zb^tglfvea\_^VkwrQG=ea\x}UZT{udurZTJea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\f_Ua[Qe\Qea\ea\ea\ea\ea\mphbZOc\Rmnfv{ce]f^Sb\Qf^Smj`ea\ea\gaW}gcYVUNea\ZSI}a[Qmytluo^YPWND]UKUMCfaVhjbaZOg`Vopgea\ea\hcYhbXjg]ih_nrjea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\ea\fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]ltoZUK_VJ𒯭g`UXTLlytdbYd^Tnpgsyrc]S}fb]\UKlup4.'qxqhaWfb]fb]fb]bqoP[Xnne{}ZYRcXLeicy{f_Ufb]oIHB_WLqwo_WMXND}fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]ae^muo銯fb]fb]fb][fcqp~y{mWUMPH?fb]fb]fb]|Xkj]WNavvڛfb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]cZOaZOe]Rfb]fb]fb]fb]fb]oskbZOc\Qlnev|hkcqsjb[Qvzsqrifb]fb]hdZ;5.|KC:qNRMee]\SHltna_VXPFmrk[RGnyte_Tgh`b[PidZ~fb]fb]mlcjg^iiafb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]fb]gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^hnh{ōWa]ac\dkes~xb[Ppskrwod^Tgc^]VLmzuRWRvynlbgc^gc^gc^[lj@C?tyqaaYicYismz~hcYIMHsgc^o{]TJryr_WLYODgc^gc^gc^gc^gc^gc^gc^gc^gc^gc^[b]Yb^bf`{gc^gc^gc^frncg`q~GC<ܬɡgc^gc^jldlytohibNOIYZSgc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gaW`YN}kh^gc^gc^gc^gc^qwob[Pb[Qlldw|img~b[Pstlgc^gc^ljaQWSU^Zd\Rbf`gc^ee]aZPjqkef_ZRHSKBlrkgc^gaWnneb\Quzre^Sgc^gc^z~쀏nrka[Qgc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gc^gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_zswpgnh뀌mgpj\\Tb[Pqvnptle`Vgd_^YOHB:kf\w|@C?G@8bnjgd_gd_z|__WicYo}y{~opgUb`v{DB6]TIt{u^VKYODgd_gd_gd_gd_gd_gd_gd_gd_gd_gd_i|zgojevtegd_gd_gd_litol}{ܢ۔gd_gd_mrjǍalhjto^b\ovpgd_UQHgd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_qsk`XNz~v{tgd_gd_gd_gd_szsb\QbZPkkbv{kqklj`bZPicXyygd_gd_rvngd_;71UNEgd_kohknfw}iqkinh\VLVND\TJgd_jh_c]SidZugd_oqi^^Vuygd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_gd_he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`UTMgaWZWOy~ME;n{vXUMc]SsyrnphfbXhe``[RG?6yxWllMRM`e^Yigcje~pofrsjuxx~Zoothe`he`eus]SHu~w_VLZPEhe`he`he`he`he`he`he`he`he`he`zce]zMTPhe`he`LOIXYR\SIϜw{x~͘he`he`]aZWYSp~y|ntm]xzctqhe`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he``XMxrtlhe`he`he`he`u~xc\Rb[Pjjav{nuog`Ub[PicXidYqhe`~SJ@{ecYlwrlsm]XOWOE\TIswoc^ThbXVUNhe`c\Rwhe`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`he`ieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieasJD=YZSRLCp~z[YQe`Vu{tlldhe[?>9a_Vif\KEieaieajh_lxt|ieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieaieajfbjfbjfbjfbjfbjfbjfbjfbjfbjfbjfbjfbjfbjfbjfbjfbjfbjfbjfbjfbjfbjfbjfbjfbjfbjfbjfbjfbjfbjfbjfbjfbjfbjfbjfbjfbjfbjfbjfbjfbjfbjfbjfbjfbjfbjfbjfbjfbjfbjfbjfbjfbjfbjfbjfbjfbjfbjfbjfbjfbjfbjfbjfbjfbjfbjfbjfbjfbjfbjfbjfbjfbjfbjfbjfbjfbjfb`]T^ZQfmgf`VYXPmzu`c\fbYv}vih_jh_]tucbZzsli_icXIDn^nmcg`z_WMy^YPlhdlhdlhdlhdlhdbic\a[ڛswlhdlhdlhdlhdlhdlhdlhdlhdlhdlhdlhdlhdlhdlhdlhdlhdlhdlhdlhdlhdf_T`XMt}wf_T[XOsvnlhdlhdlhdlhd|e_Ub[Pih_vzt~ylj`b[PswnrsjRMDTMD~v{tqVOFbhbdd[daWlrkfib[TJWOE[TJe^SgdZb\R~UXSlhdpqhjja`[QlldlhdlhdlhdlhdlhdlhdlhdlhdlhdlhdlhdlhdlhdlhdlhdlhdlhdlhdlhdlhdlhdlhdlhdlhdlhdlhdmiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemieVTLuyqmzu\ZR`ZQmielmdvxeaWqvnr}wjmf\QFhsulmieNKCbicfmhxRJAomcЏje[Xb^pmie^nl`XNzaYOIB:]TImiemiemiemiemiemiemiemiemiezsFLIepk`^Up|wjkcs{tikcmiemiemiemiemiemieǚuyitpmiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiee^S`XMt|uje[[XPpqhmiemiemiemie~e`Vb[Pji`vztzrulb[PsumuzrVSKRI?srtkOUQfmfZWOfg_klduyhnhimf\ULWOEZRHmiegaWge\b\QrumTOGmiegaVoofrwomiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemiemienjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfhsoVOFҝkg]kg]o{][Slqipunnpgvxe`Vt{ta^UlrkidZbYO˲icYnjfPLDVPGjtopSLCuyqomdmjanjfaz|uD=5p^aZb[Pzb[PKE<]TJnjfnjfnjfnjfnjfnjfnjfnjf`gadqmx]VLhiaY\Vcpldha~pwqnjfnjfnjfnjfnjfnjfZ\Upvoۉ~njfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfe^S_WMt{umkb\YPnnenjfnjfnjfnjfe`Vb[Pji`wzv|{b[Qrul|XYRRJ@jwsnlcnjfelfVPFildt}wikbiqkioh\WMWPF[TJkg]ge\b\Ruzrone^aZnjfgaWoofnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfnjfokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgpyt\TJVTMaYNv|ts~ab[ze`Wpskv~we`Vx{^YOmunw|uG?6okgqpgokgME^VKokgokgokgokgokgokgokgokgfmhiuqwwmrkokgksmZXPqysokgokgokgokgokgokgQVQih`qyr\[Sokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokge^S_WMs{uqri\ZQmmdokgokgokgokge`Vc[Qji`w{v}gaWb[QrtkZ\USLBSNF|mkbokgXZTTLBntmd`Virljpi]XOXPF_[Qae^nmdhf]b\Rli_ooflzvokgmkbopg}okgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgokgolholholholholholholholholholholholholholholholholholholholholholholholholholholholholholholholholholholholholholholholholholholholholholholholholholholholholholholholholholholholholholholholholholholholholholholholholholholholh^ZQ\TJYQGsxprysjdZsvnmythoic]Sqvnu}ve`V{_XOoys{}PMEolhnofizxLC:RH>u`nkTOFyxx~v}4.(solholholh[\Ud_Uzd^SOIA_WLolholholholholholholholhp~zftpwyt~xolholhbkflpiolholholholholholholh̕blgolholholholholholholholholholholholholholholholholholholholholholholhe^T_XMt|utwo][Smlcolholholholhe`Vc\Qji`w{v~hbWb\QrtkXYSUOFPH>w}uomdolh\a[TLBr|ww`YOjupjoi^ZQXPFhleUNDqsjhf]b\RhbWsvnQKColhtxppskolholholholholholholholholholholholholholholholholholholholholholholholholhpmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmiǾ\WMhcXXSJsulz|hnhs~d^Ssxqt{tfaWNYW_YPr~xnlckh^oriNH?OI@RI?{WNCVRJqriqqhiqlepmipmipmipmi\\UfbXzd_UPKB`XNpmipmipmipmipmipmipmipmi}Ya]__Whjclpipmipmi[\Ub_UŐpmipmipmipmipmipmit}wiyvpmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmie^T_XMt|vv{s^]Tnlcpmipmipmipmie`Vc\Qjjaw{whbXb\Qrtkv{sUVPWSJOF=svnqqh~pmigtpVOF{mrk_WLkyuioh_[RXQG^ZQYVMsvnhg^c\Rg`Uz{YTLpmiz}txp|hf\vzpmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmipmiqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnj|rvnejdfqmdf^d_Ut{tsyrgbXVgf`[Rt~idY>80~qumbkfXXQTLCYSIURJkf\ke[n{vmmdšqnj[tuqnjqnjqnj\]Vge[{faWRME`ZOqnjqnjqnjqnjqnjqnjqnjqnjZc_uef^qnjqnjuza^Uqnjqnjqnjqnjqnjqnjx}c^Tykxtqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnje^T`XMt}wv|t_^Vmlcqnjqnjqnjqnj~e`Vc\Rkjax|xhbXc\Rrtkv{sUVOXVNOF=qsjtwn|qnjTRJ\[Sgia^VKn~{hmf_\SYQG]YOad]tyrhg^c\RgaVdibqnj~qrive`Vnphqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjqnjrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokjupif\[XPkzva`Xe`Wu}vqvogdZoa^Tvoof\gcje[nslugsoXRIWOEkh_rskorjkf\tnmdrokrokrokrokrok\^Xjh_zgdZSOGa[QrokrokrokrokrokrokrokX\Virlݗrok_XNnqj_c]ؗőrokrokrokrokrokrokm~{u{fwtrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokroke^T`YNu~xuzr`aYnmdrokrokrokrok|e`Vc\Rkkbx}xhbXc\Rrtlv|tVWQYWPOG=qqhvzr{~rokomzvb_Ved\^VKqgkd`]TYQHXSJ]\Tu{shg^c]ShbXq|rokppgs~yd^Tkkcrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokrokroksplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplp}痷WTLp}^^V٠gcYvxptlhe\splc`Wxsnlcryr䁣\pp\ZRWOEmlbz{nphstkyidZgPa`splsplspl\`Ykkbyif]UQIc]Smphsplsplsplsplsplsplspl`d]r}watssplntmXWOݓsplsplsplsplsploungsomtnm{wsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsple_T`YNvzsumbc\nnesplsplsplsplze`Vd]Rllcy~whbXc\RsulWYSYXPPH?qriw{s{~spl{n|x`[RcaY_WLrfjc`^UYRHWRIVQHu{shg^c]ShcXXTKsplppgq|vd^TjjasplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplsplspltqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmmtmWVNxyq~ce^ȳhf\wznqhjh_dzydcZzǠyzu}vce]XPFonelmehcY܉tqmktqmtqmtqmtqmmnfy}kjaVSLc^Tnqitqmtqmtqmtqmtqmtqmtqmlyumzur~yQUPevtKLFjldo~zѠtqmtqmtqmtqmtqmt}v`e_q|tqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqme^TaYOw}ppgdf_ophtqmtqmtqmtqmxe`Vd]SmmeywhbXc\RsvmX\VXVOPI@rtkvyq|tqmv_b\^YOb`W`XNfib`^VYRHYTKUNEtyqhf^c]ShbXWQHtqmpqhr|wd^Tjkbtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmtqmurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurn\UKdbYkpiYTKjg]膞uxpo}yiohjh_x|mnekjaT`^ff]{urnvzP`_Wmmjpj\_XZSIpqhYYQʄidZ=;6purnurnurns{toqiwzmmdVUMd`VnqjurnurnurnurnurnurnurnrfiaTWRgh_IKF~ɐurnurnurnurnurnc_VWYRyejcurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnf_TaZOyli`ejcqskurnurnurnurnv{e_Ud^Smnfzwc]RtwoX]WWUMPJAuyqsum~urnsXXQ]WMb_V`YNeib`^VYRHXSJUNEsvnhf]c^ThaW[YPx}qsjtze_Ulmeurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnurnvsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsoikc_ZQwmlc{}jtop}xdaWvsokkby}kkbmmdzgh`vsoy]onWRIp{v{]WMidZywXWOjf\Vecvsovsovsovsoqwpqunu~woqhXWPebXVYSvsovsovsovsovsovsovsotzKNITVOpxqvsoksmܐvsovsovsovsovsolnfk~msmkrlvsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsof`VaZP{idYgnhtxpvsovsovsovsou~we_Ud^Tnpg{wxxc]Stxpz|X]WVTLPJAyyqqhvsouUSLa^UaZOzfia`^VYRIYTKTMDprige\c^ThbW{~`b[zy{txpwfaXoskvsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsovsowtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpu|`]TԊidZekezotmwtpmney~ji`ophVecikc[`[zkf[wTKAvu[VMie[poeXTLli^lj`li_ˀwtpwtpwtpwtpih_rxqt{tqumXYRgd[Y]Wwtpwtpwtpwtpwtpwtpwtplnun}tqxrnvoszԚwtpwtpwtpwtpr{twtp^^VwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtphdYb[Q}g`U{w~wwtpwtpwtpwtpszsd^Te_Uoqi{xqqic]RuzrppgW[UURJPJApofwtp{TRJ|a]TaZPeha`^UYRIYTKSMDmmdge[d_Ujf[v{sdibytwo|}ig^y~wtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpwtpxtqxtqxtqxtqxtqxtqxtqxtqxtqxtqxtqxtqxtqxtqxtqxtqxtqxtqxtqxtqxtqxtqxtqxtqxtqxtqxtqxtqxtqxtqxtqxtqxtqxtqxtqxtqxtqxtqxtqxtqxtqxtqxtqxtqxtqxtqxtqxtqxtqxtqxtqxtqxtqxtqxtqxtqxtqxtqxtqxtqxtqxtqxtqxtqxtqxtqxtqxtqxtqxtqxtqxtqxtqxtqxtqxtqu~ĖjdZoneac\{ii`nphz~ig^ptkyjmfhcYLD;mj`|li_NEQLCxwe\Qtwo}zwcmhPLDc`Wb^Ub[Qijbcd\]XOZTJ\[Sswpd_UmkbVSKad]}zwqtm_b[}zw}zw}zw}zw}zw}zw}zw}zw}zw}zw}zw}zw}zw}zw}zw}zw}zw}zw}zw}zw}zw}zw}zw}zw}zw}zw~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{xjngxy]YPkh^yy~{xgcZwzwyig]}b_Vu~MHAS_\qqhVPFv]YPficXSJmj`jf\EE?~{x~{x~{x~{x~{xfaWki`ed[rxpq}~{x~{x~{x~{x~{x~{xjngktnWecKJD`_Wjmfҋemh~{x~{x~{xs|~{x|}_aZdf^~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x]ZQqsjkkce^SgaV~{x~{x~{x~{x~{xhe\d^Skjax|lrkw}vgcZmkaKE=QKBpphf^Sh|zZ[TPMEb^Uc_Va[Phh`cd\\WNZTJ]]Uljad_Unne[\U̒gcYkup~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x~{x|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y_ZPjf\ce^~tyqs{uhf\x|v~xjh^|ycaXwmkbWYSnx~vZVN|y_\Saf_YTKlg]dbXli_Wca|y|y|y|ydg`gcZji_daXt{tu|y|y|y|y|y|yfhaluoc`Wge]SUO_\Tmrkp{|y|ydaX||yntm[ZRWVN|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|y|ybd]moff`Ukh^|y|y|y|ygdZd^Tllcziohlh^he[~KEOH?v{sppgkh_D?7VUNRMEnsk`ZPkmea[Q]]U^ZQ[UL]ZRfsoyzfd[fcYz{txpۃ~mlbpskfcYnrj~~~~~~~~~~~~~~~~~~~~~~~~~~a\Sotmisnr~ori}qtlnpg]pnjmeli_JKDnmdu|ustkVTMy]YPfjd\XOonekohv{tЄ^XOxqumlkcptleaX„v{c_U_e`kupb`X|ǃntmgd[b]Tntm[[Tc`WgbWc^Tli_rumkkce_Tig^vxyieZc^TqumbXLMF=NG>OG>svnkg]hcYYfdTPGRMEhia`ZPlogb]S^^V]YP[VL^]UaqoopgebYgd[je[ZYQyyv{sqvoꄂnsk{a^Ujg]xptl}pskorikogonejmj`w~w\_Y^li_[Rhmg]XOli_mslsumzsyqmmdrxpebYޅwŅovofrnbc[caY}s|v_XMZZRq|wmslgaVeaX|txpih_e_Ukjax|u|icYd_UrxqbWK>81OG?NG>jeZoofrSNFRMEed[`ZPkngc^U_^V]XN[VM``YidYe`Whg^}v|tWSJie[ljadbYc_Vq{unne}kldqvn}nqiptllpiv{sV]Xli_xzdmh{a^Vjpj]YPpnepwqpqhu}vnogt|ugd[xb_Vn~_^UdbZpyshg_UTLPOHa]Tgiazljahe\jf[yzhe\e`Ulmdzr|vnmdfaW{}cYMC<5NG>NG>vzrdqmSOFSOHdaX`ZPkmeebY^]U\WN\WNbe^ooge`Vjjakh^jh`ntmjmez{z{oslrxp~npgrvnoytmrklkh^y}m|yU^[baYjql^\Stvmqztoof䇄r}xywzoqibjehf]_\Sy^WNx^[RccZwkpi`XM~_\TVTL凄qwpv{sjjali_ʠɇgcYfaVnph}ovpidYfbYf^SF@8OH?f_TvzrTRKTPHSPHc_V`ZPijbhg_^\T[VM\XOdjdopgd`Vqskie[hsnrvnkkb{懄ntn∅ăyyzsyr~mofsxppztntmkh^s~xxdd\jrm_^Vx~vs~xoof鈅hg^y~qumfpkhh_WTKz]WM]YPdc[YVMehaii`YVMYSJTPHc^ThcXmofs|uuxpRNEvzfaWfbXptlkqjje[he\mj`nlcIC:PI@jg]uyqSOFUQIQNFb^T`[Qhg^mqi]ZR[VM]ZRgqlhdZmmde`WuyqiuphdZ}mmdirmovoktob^U牆̊lh^҉ge\t{t~mnftzrccZovo͉li`w}eg`jsnabZ}~pqhge[{sxqhupjia扆XTKz]WM~\WMdd\VRIjldlneTLC]ZQTNEaZPrmkbqumguqidZJA8qwpe`VgdZsxphleyxjh^kf[pofKERMDkg^A=7mkbTPHURIkogb]Sa\RdaX\XO\WNaaZpif\fd[gdZje[v|tUQIie[txqvz~pofpofgcZvx~mmev~wb_Vpys׫ފyxy򑻿]YPimfjupchaqpgjh_uyqge[~wzo|lnfWQH|[TJs}xZTJee]zSTN_`Ypzt^VK_XM`[Rde^TNEde]v}y}yz}PFekd\[R^VLTMD][RPI@s~xhg_TSLhcYjjaw}vJ@7|]^WjqwofaWhe\syqmslnndgcZ~e]ROJBaga@<5uzqVUNZYQOLDecZa]Sgg^ecZ[WN]YQdg`|idZmlbfcYxzjh^[]W{rvmy}fe\ovopzunnehf]mkamqjjh_{}mme|dcZv}sxpdtq_podf^nzu`^Ut}vqpgpofpqhfhanqiu}vt|uS\YTLCyXNDoxrXPFa_WtLF>ejc`^V_XNWQHOG>⏍bbZyWUMxxnqiidZqskh~~E>6moge`Vjh_v~wjngje[ig]g_TSPH|~XVNelfvdaXb]Thh_jld[WN^\SgnhhdZdcZge\kg]if\yjg]nneu|uryrzed[`]UjwrxyabZooemkaՐt~xji`{}mne}ed[vz~nslUXR<70fibo|wa_W~blgpnehjbmlcɐyאqvnt{tvyR[XwWMCwXPF`]UtJD}ZXPidYrxprtlbwvJC:W^Zjjae`Vlkcy~gjcppgkjakg]kg\[\VA=6v{snlcli_w~wXVNZZRc`Wb^Tgf]v|\XO`_WmzuqsjebXih`kg]kg]ltnjg]svnqwpِjmfrztn~zb_Vhnhuzrnlcꑏ{kkb|}mnefe]wt_b[gmfp}ycc[ljafsoppfjmfljaszrszrhjbx|͑S\YuWLBWOF_[RtF@8bf_eg_]UKWMCWTLID6HB9SLCfg^hg^XTLVQHli_u~xv{suzropgv{fcYhe[rwpw~fjdv{snpgkh^lj`VZTVTMmlcw|utzƖ\]VdaXcaXecZb_Vbc[sHD=nnemkafcZqxql{ww|daXYXQqysu{swpsl{oqijng|LKEh}{{}qqhDHEfmgs{s|z}w{sxprjopgVVO]YPkkcy~|otm[UL_aZTJ@^d^RH>ejcmyul~[UK[YQYWOIA8baX}[XOfcZrul|az{jf\oslfbXig^u}vs|vtzrmkaqunx~vSQIZ[Tkg]svmnogdbZdaXcaXdbYZXPfjdtkh_hf]}fibmlcv}v}`d]u{ŘxxľXRIqum{orjkoh|ONGqstk[^Wiqlsa`Xu}twozrumpqhvp}xmog|~qyrTJ@[[SnTJ@g~}UKAQG=ab[jtobe]VKAUK@QG=@91QI@LMGs{tZVMge\DŽ~klcfbXkkcy~ovphe[v}vXXQ]b\li_llchoidaXdbYdbYfe\^]VkupDA:jf]c`XkkbmlcΘ\YPjg]ki`s|udjdwәz|vqvn{orklqj}Za]yuxoXVNlvqtaaXwsvmnlb|sxpqsj\WNoslzt|v^^VWUM~cc[OE;\[S~hrmWOEhngIA8m`f`ivsED>TRKd_Ua]TZULhdYlkajf\CEAhg^gcYnph}mslkh^ki_zqrinmd]`Z_gbppfpqhpqi__XkogecZed\ounbd\uyntmprieaXmqikh^kh^kupoofw`b[wߚ{]\Srwo|psklrk}gyw~w{sZYRmytude]wsumnlb~u{sqtktᚘrxqov{PG=SOGyVOEvtMD;XTKvivr_aYWNDTJ?q}QMEUTLOIAGA9TQIkff]fcZXTL[XOie\kmfjf[{[hezgd[he[qvo{kqjnlcjiamkaXZTbjduzs\[Rff]ed[fe\dcZgmfmmdki_fd[ryrkh^luossvnxzed[hqkv~盙|\[Srxq|ptllrl}usxx\^Wo}yugjbxsvmppfw~wrulu~x~axyxRI@KF>m~{TKBZXP@92RLCn|yn~WPGagb>7/gojMH@PH>agaUZTUPGqNIAcaX|[WN`b[ie[rxqw}uy|HE>punfbYih_u}vv{jpjkh^npgvzr_f``gayzz}dg`ZXPecZed[ec[\]Vo{wpqsjhg^v}]ZQlkbnphqxr}o~zu{뛚|\ZRsyr{qummtm}ނ{|_c\q~uiohwtxpcrtkyzswoOI@x~}oxr{̛VSKcniSKAZVNKD;dmhvpchaTJ?C<4RH>]c]hpjac\yaf`KG@UNEufqmexwvy[ULgpkruly~oneyzMJCA<5lmefbXllcyrztwzpqhqvnmi_twndnihtpkh^tyrmog󛚙_^Vb`WdcZecZfe]cf`{QRKjg^jg^lmev|uyhoikh^ki_ts}xSSM}\[Ssyr{qvn]b\nun~lpi}bhcstmuovv{sitxo{~sxprLMH|zeg_}ޜGB;XTKZ`ZRKAUNE:72o~zWZUxcokTLBZ\Uad]B=6XVN3-&ZSJtWNDipjXWOIB:UMCyfrnd`W`\S]YPonejf\MKDHC;ih_fcYoqi}pwpjh_|u}vmj`qkh_qumt]\Ted\ed[qztjsnitoONGknglkbdbXptmlj`nndmvpvmmdqvoxqztX]W|\ZRszs{qvnbidnvo~~ouo~emhtrozuuxwkw|t}v}v靜pxryhkcCIFRLCMOIwGE>QI@RKBgXYRcxwZc_=6/RLD^gbOH??80JB8b}]YOu[UKp{v][RSKBSSLVMC|TNEpdcZfcZ[VMcd\ebXŋJDmka<<7LG>s|ugdZig^t{t{qyshe\x|mj`li_ki`^b[^\T\ZRfg^hiaee\lzvFIEVXRSTNjh_ki_ii`zsvnoztljahf]ecZӡyvvrul~kqj_b[vx{syr[[SpztnmdrztuzrÊpvbd\|dkeq}xopgPJBm{wv{^b\zKC::4-RQIW]WRWQT\X83,:60uYODhjbyp{uhkdhkcmsmt|<:4nOOIZRG⢠YQHWTLfqmyed[]YP~ge\f`Ujf\xwPMF^khlogfcYkkcx}v}q{umlbnne~mlbw}v|\[T][SYWPff]ee]jnf~QPIs|ujh^kh_mpiac\kj`ki_~vyzmmd•|~nuo`d^vy{syrZZSq{uonerztuypČrvfjc}bgauprjkzvˣz`aZ~ģMF=:60E?7^stup}yt{yq}xmvpnvpq{vw~50*lkevtVVOQQKu\TJLNH_[S_aZ`d^pumdf_kh^qtlkh^HICVXRaz{ih`gdZnqi}s}ws{lj`nph࣡~\[S][SdbYgh`ee]wUUNhh`jh^dbZszsljaz}^\Slkb__Wp{}yyowpaf`wz|tzsZZRq{voofrzsuxpƌttiqk}`e_xqumfibfkdأgtpJB:fvt94.ttuuuwz|ڣ\kiHB:QSMTcbfyxWSK_YOBC>IE=fiamyt]^WBC?ijad_UUPHq}x63-jh^s{tHF@jg^[gcqyge[hf]rxqr|vli`ryqomdvzrljagf]nne]\T\[Sgh`ikcff^}SSLUWQa_Woofii`zqrjzklddcZgso}~uzrowqbicwz|szsZZRq|wppg82+loguxpǍtrmwr|`e_hkdsxpff_mwrIE>{KC:clhQSM=6/EA:C=5LLF_pnUb_furMG>E@8=6/RLDcaXTb`X`\p|vr~x|]\U~?AEFA^YOjmefha^^VxpjldfibfcZlj`NUQli`rHPLijbgdZmpg|u{u}ih_oof~svmnmdv}vZYQ]\Tdc[ikdff^{UUOWZTZ[Thh_lkbcaYs{ulkaeibnxrlj`{~VZTxrtloneee]dmhw{}t{tYYRr~x~ppgiiavzrɍug~twcjdbbYwxpxrzbgaJC:YYRE<4KD<<6/WTKUNDa^UgpkS`]^\Sff]r|va^UabZrPVRmsljkb`[Rt~@>9fcZrtloofWb_}ge\hf]qwpt~yxxxnqi\[T\[Sfg_ilehkcuVVONQLhiab`Xqskijb{ppgz}p~z|oqhmtmbd\nvpq|wrtlgibenjw|}s{tXXQr~ypphhi`w{sʍt|xseojde]y|wVZTħYWNB<4TQITRKH@8ahbUOFUNE[YQfpl[qqWPF^VL|ƧA:2`[Rcf_o}xקNMFEA:ecZeaX[YQLOJmmdu|upvokh^rUa_ntmfcZjjavy{u|mkaszsyymkbfd[lkb]\UXXPgiaef^TTM_ie^_Xounv|uwxnskki_mnekja``Xlrkhkc~x|z{imffqlw|}t{tXXQr~yppghh`w|uˌt[\T{m|xivrgjc|}hso䨧TNEPJBk{xkΨKD;GD=[]VTNETNEYWObid`_WVPGc`W@<5B<4a\SflenytNMFNJBKF>=93b_V}hmgRZWjg^xLH@li`csqikbgd[mph|v~xji`lj`ynkbmlcprilkcYXQ\\Timekohfh`OOH_farWXQfe]llckjau~xmmdqul{kj`mwrڨ{x|lme恐jpihtow|}t|ugg_r~z~opghh_xwˊq`d]}itpm~{johĐ~bc[sC?8n}yKE=`gb;4,j|zMF=JD;YZSTNESLCTQIXZT[gdXQHc`WqysA=7a^TipjnytMIAOKBOKCKHA]a[sztxOYWQ][mlchzxlj`txpdgkcge]hf]qwpu|{jjbpripqhrtkZZSZZSfh`fh`X[Ubf_b`Xprijld~nnez~syrgf]q{vp}yxv~w傑lsmivrw|~t|ugh`rz~ppghh_z{̉nelgenismtnǑjpi|\d_]_X@;4WXRQRKUUNLD7ZSIZULb^UȪnxrdbYksmp|wMH@RMESPHRQJLMGjldfbYZYRWfdlmemlc٩qsjmslfcZjjbvz{wtyqotm\\Uykpifh`RSMj|z^b\kmetyrz}qwpsxp{~kj`mlcu}v}ioinlvqsqvnz|z|mvpjwsw|t|viiarz~oofhh_|̈ijuq~ckfyoysɑs{z﫪YXPn|LJCetqA91D@9KDYgd\ljFIEYSJWQH_XNu|>;5MJCOLEJG@jmfikdktnr|OLDUSKWWPXYSVZUGLHb_Vmphjsm᫪gf]v}unmdhUXSijbfcZnqi|xzmkbvyrtl}CA:lkammdhg_UVO[\Tfg`gjb[c^lwstec[mmeee]yljajh_oytprjhlfymytlnytjnqiopgu{tnwrkyvw}t|vijbrz}uyqhh`̅b{|n~{|dmgr}xyyˑ[]V|qKG@XUNtRWSU_\@91IF>A;4r}\WNWRI`\RpwpKG@PKCNJBEA:b_Vs}wmvpwTTMZ[T]`Z^d^^fa[fbae_gg_}NMFqsjw[iglja{fe\hg^ryrwig^oof|oneGE?_d^mkbllcYZSfibkqjgib[d_ac\[\Urysb`Wopgmqjpri}nphlkb~r}ab[g}|s[qr֕svnoytm|yw|t}vijarz}ɬhh`΄sxgqms|qrjΒbe_ŁɁJF?TQIclgbe^ZVM_^VWRI^YOqztU]YY_YX[UQSLa\Snxs~GJEZ_Y`f`ckfeokgtpfwu_qpc`WkmeWXQNLFQSMki`ronexzrwolqjfcZkkcw}|xknfswo{UUNpqhZ\Ueg_fjbvf{zdjdOVRgf^mmduyki_mmdnof_`Y^`Xxqskozun~{w|t}wijbr{};93hh`͂RRKxqkyušupqhГnzv֭_ttoPNFY^Xbkebicbga]]VVRI`_WdcZ{孭bsqP[Yee]ijbxr~zW_[cnjhuqkzvn}ppjRddebYgd[mzu@?9UXRqvolj`oofNNHhiafd[ork~x}mkbt|upqh;:5[]Vophpqhmlcprjq}yltninghoi}dbYllcile~nofophbe^nnfnpgfg`qѮpriozuo~w|t}vjkcr||INJgh_̀X\V{ixupȚwqriӓUUMzƒgrnZWOZWOaaYp{v>:3JF>WXQltoa^T~s}yewunsvxvrkcyycf_c`WTYT|hg_óophr~yfe\hh_t|uxlmeqrj{nmdnlc\_XZZSnofqskkjaW[Ufhaehacf^`c\\hejkcki`kjaqysqsjllcmnfszsÝڢ۟ޯ~opgozupv|t}wr||Sbagh`~^hd|dokxxqrj֔clg꯮^jfiOOIKJCIHBNRMVec;71961QXTC>7?<6jqk`\Sv}xwelppqrNOIROHpvonsl_y{dzyylkbnphjmfecZkmey{|lmenmc[\TVWPpphuyqbe]lup~reke]hded[lkboqizxxswpeibnnebaY݁Ⱟpsknzuqv{t}wr||Űgh`|etqzbmizqsjٔr}_f`l[`ZTRKVWPUVPVYSX]WY^YY^Xcid`_VcbYzzxa{|atsHPM?@anjMG?pv|>;4HE>NMFTTNZ`[Y`[[_ZXXQSQJMKEVSKmqj󲱲?C?jldᲱlkbQSMgiafd[nrj}zkldtyqqsj\^WY]Xqsknneisnfkenab[woundc[npglrloqhmmekkbvzisnz`bZuce^irmtuzt~xq}zųgh`w}up^lisô}rtl䳲__WLF>XZTSNFOIAHC;GF?92,NPK72,_ieSPHSOGUSKVVNXXQYZSZ\UZ]WZ]W[]WZ[TYWPWUMWVN]e`a|~jldfd[zAFBFE?B@:opgAAIA9QJAfojcplRPHjvrelfdicelfitou˵@A=XUMijb鵴hf^qskv}ulkaee]fe]qwp{mnfpphnmd`hc[c_pripqhFGAelgff]a`Xmmewmmddg`llc{a}ǛՌt{tde^yzrxpws}wu{p|yĵgh`qsjswohŭ〙rum┵a_Wr}ZZSXUM^`YSLBY\VC=5XXPX`\nslhg^==7DE?KURu{tnnehf]dcZjlcw~}صw~wz~~amiy{·_b\qyrff]kjau|uv}v[[Tmldnnfңv~fjcy|psjxr{uu{o|yögh`svnūހsvnᕵx_idp~zSLBOMF@:2QNFaaYdbXu~w=:4mmdX[Uqskrvnab[ed\otm{vxqsj}eurprjprjjjaWXRff]cc[llcvmnece^kkcnog]d_{l|yi~ab[xqztv}o|xgh`rulŨفrvnn{v]XNͷZUL޷WRIhoin[\UYVNhh`?>9GE>bz{kkcopgr{uQQJ{cbYijbuzRTNoskrumonefrn[c^nnepwpff^llc|kupZZSmmdpskRUOGHCxpysv~n~{xgh`qrjCHDťҁswo[UKbpm\YQQI@VOFVOE]c]WRJD@9xotmhg_P\ZJICGE?nslRSMlkbhf]dc[mrk}|OQKSVPxyu|wxm|nof큑|ff]ee]kkbwnwpmmdabZlmd{{qOVRu|tu{sypxqwm}zxgh`չ>=8Ƣ̂rvn޹PI@RJAչVPG\\TVb_VQH^]TdbYzLRNmmeSRKmlcv~w]\Tgh`t~xX]Xkkbpqhqsjv|tdplMRNpskmmdynslgh`mnfdhajkcptlzpoknfxnvox~l|ywfh`Ӻ:60ƞŃswobhbRKBYTKYUL[WNiyvTMDZXPijbɺvzmmdee]POHuzsbaYlpi|}bsqezysztpqhSUOz}fe\df^kkbzwmmd__Xorjlmejldsxqxmtmy}j{xvgia94-ƛswo]XOdd\sozu[WOVPGPTOWTLjqkknfgf]ntlONGmmdzfg_s}w绻tzsopgzophllcii`koghjblme`bZnrjZki𗹸vxwkqky|iyvvgjb84.Ɩsxp\WMZSIOMEWPGlxt\[R^]UyUNE][SdcZz~QPImmdt{t\\Tkpi{}jrmphoofqsjpqhopgv|t{ff]de]kkbmne]^Wqwo~悕u|uwing{{fwtuv~xgkc73-\vxērwoRME[VMʽYXP]`YXSJWQHirm]`YZ[TkogvzllccbYNOHwyef^s}wͽz~nogmmdtzsIHBhiaimekkbnogvx^^Wqwpllc׽uxpskugke|{csqtszrgjbYpqÐswpWQGxhlePJAxJF>^^V[XOubbZgg^hh`qxqOOHmldnqikpj{~kkbswoqtlsxqPOIqysgh`bd\llcIQNophnph\`Zlldt{tqxqu}vteh`~y^mktfhaSggsxp\WNPLDQKClxsYTLZWOjuo`e^__Wed[پSUOtzrRUOmmeorjbc\s~x}uy{~pqhQPHr}xff]imfnqipsjklc]_Xlnfqvnbg`sac\~ywseg`~鿿sxp\VMXRIOKCWdbTMDfqlGC<^_XWRJlVVOs}kohjld^c]jkcmmdIKFkqj|ǿnnesyrptl|QNG\ifkohjngmphlldpumYZSkme⿿ad]}p_aZ|yrehasyq™swpgxv[VLVQHaaYSVQOJBqXUL`c]YWOq^]Uee\~guqTSLtzU_[oqiiianqis{}t|vnoftxpz~Šu{tSPHcrpu|gg_gkcu}vMQLsyr\a[ptmoslhrmm{wpkupfke~gzxrvoqUVPcmiZULMIAtJF?ZXP^a[[]VOKCagahiaXXQMLFmmenofmmejrl~}Æ{rwoqvnt~xWWOgxv``Xgh_lrlqun?@;sxqsyr_d_jkbt}wĂj|zxwnfkdhpk}}RTNrvoyjtogjbWRIbd\nMJBTQIuBA;\^W[_Y\_YD@9TWQ`_WhiahiaZZSVVOlohmneXkjqumllcu~qxqnofpqinofāoqhȭhzx\a[j}{JJDlpikpidgaklctrwpptmilefvsvwmhlf{emg}|RUOqumX[Uŀ]XOTOF[YPp{WklLHAGB;KHAx[_XZ`ZX\V:83NYWS\Xelfff]XYRXWPSUO}ajeqvnoskirm{|󆚖qunsyqtycpmblhl~MMGtzgg_iohqwoxwzx|RTNlogơߝƺ`mjs~wjjoh\b]|TYTqtlVYRöaaY_^WWSK`b[HF?k}zIHBtX\VZc_PRLbb[~hh`VWQXWPY\UpvnrwoX[TxyqvorwonofophƕoqhQVQgvslOOHdd\fg_owq}t|uvyptlTVOpwpknfǗЃw|Yd`yp~ywffibZ`ZkwrcnjU\WptlW[Uõx^ZRTPHZYQfni>;4KG?JGAGHBQSMV_[FIDgnhgiaSTNXWP[_X[b^x\`[orj|{}u~y|sxqt|ui|zi}{PPIHICileowqflelnf`idx|mphknfryrȶu{zQYUlxtx_yzȍX]WlxseqmT[WqvnX]Xôo}y]ZQYULVSK]_XwA=7972FD>DE@B?9DJFce]r}whjbKLFUUN\a[ajfmnfLRNYZSnofqvo{yryrt{soqhnphȣrxpcwvcspQPJNPJnuojngjpjt|vblhv~xӗyw`f`CGChrmy}ɃUYSmxtWXRQYUrwoY`[IJ`b[^[ROMFVVN^d^OTOAA;=>:jsngibRSM]c]dqmqtynpgXZSmofs{tsɁ~{vxu}wYb^QQJSXSxfg_nxrmpi^eamphkmekmet~x^b\}}bic{{rwpmyty}\b]NUQqumZb]ð[ml^]Ubc\ONGWXR`ieKXVbd\]hdhjbMOJ]e`i{xx~ae^YYRmnfmogkv|sxpnpgnofQYVmnfMPKQSMZd`ff^s{ykmeW\Wnqirv~jngW[UtX\V{}xˣmxt^gcHMIZc_¯˄][Sbd]JJDTWR`oln|xhkd]hdpqzsmnfUVOt{t}؂|ˉ~˓mnevymnfSXRaqoThhgibt}jupryrQTNTVPqs~x]xzmphfvv̡mxs󖹸SWR_jfDFAzZd`[ZR^\T\[T^_Wp~AFBMZX[ifLOJabZgia[kjvkldY[TnphmogvFHCLPKRYUWb_P[Y}ophu|uwyq|kmeys|Wa]f}|YqrimfsykupxKLGY_Ymphkpilrk͊{~ps͝lwr񇞛OPJ_kgSUOt|uZeaLYXBB<__W^`Y[\U[]V[^XY]XW\VRWRINJalhVZTMMGo|wo}johr|vmogUWPt|uߺokldqunCD?O``hjbmogYjifkpioxrmwrlphjld`miyhjbildΜvhxvp|ΓkvqqtlQTN]jfUWQnrkXeb\ZRGID]]Ubf_dkfbjednjdmi]b]XZTRRKNNH?B>lvqkqjhjbJOKEFA^d_nphnrkd_mj[d_GID~u|tqum^_Wx}ryskrklslnzvjldV\W{fhaimfπW[V׭Zfbx}luρjtoʈ{SZUZgdVYRmqj}Webm}zED>KOJ[ZS[[SYYRWVOUTM]hdbwvbd\oeg_W]Xymog[a\lnfryrKOKORMSTMSSLVWQV]Xzx|w{nphdjdmqjvkrljpio}ynslyw~hleq|wcf_LNIކjvc~fpovoisn˅zT]YWc`X[UmrjzUdb\\TKQMJKEGFADFAp~ysingXb^Y^XqwpQVReusnqi}EMKVb_^mjgxvntjlphЍkzwpysmphjqkjqjp{pxq{w}q|wсkohіgqlx|R\YQ\YY]WmqjyvSedRRLmzvLRMKKEKKELNIMRNLSOELIX_ZzimfgjcT^ZY_[Y\VNOIt}wfxvs{uild]ss_tt^srрpsky}bic{jldhngmwqq~msl{zҤz\uwztenhyNYVIROZ`Z^a[nskp҃\]VzO^\KKEJJDIKEBE@[a[|p}xdf_LRNU[VW[UWYSTUOnunv{cvtŕ]rqBC=zqvomogbicmqjlphVc`dhauthkco|t{luogleEIEcjdbjelphnZa]Ӧg|^aZ_b[eqmmxs~kupehaHJESUOUWQVXQVXRsyotmr{uovozӑ`fanuneroac\{lqjLOIqzs[]Vr|tŇ\ste`d]^e`сZc_Ԥԏufsokuqcic`d^hpkekemxsdg`EF@PQKTUOWYSY_YԀpwqs|vhkcEHCPZWmrklptlt}w|qyr`hc`b[x~QTOoyslrlq~՛ԖksmO^]gls}xY^XdwuYc`աՔmzv`rpm~{^d^tkxteibdibLRNTZVYa\^ifdusՃv}nunkogNRLXa^autGMIHPMzpvnu{yovoWZTx}ksmszVgf}iqkUYTք_xzl֟QUPi{zU`^֛֕fpkSVPjzwbjdo{|s}wy{JJENNHPRLRUPQVQJOKzu~xUYTuy{xs~x]d`yr}?A=fkeu׎{imוFIDh{ynslלѰ]eakTVOUiiLURBD@=>:m|xlrkmwqpwqbutMOIJLF>@;u}}TXSmqjqzsyszrzmyuisnq݆novhmgewumsl؜ѭOWTVXQizxixuglfu}wpyst|}Zc_lqiqztowqUYSq^kgaf`ojyv}_vw`pn~ٗȧteok\_Y\^W]`Yirlpztfibhyvjngv}]jguz}dibYopJLGlPTOWfdڇڋdib]_Ybgacg`p}yJOKn}ywpCIEJLGLMGLNHFHDemg{y^qp|snzujtngmg۠p~bkfxpPZWasrnkhywgsobhbfhaszXZT{ܫ팬܉xnX\VXZTZ]W_d^gnh{hng{xgni݊n{vn|jvq~hxvbf_yތmRelease_v0.3/kernels/compiler_julia_ref.bmp000066400000000000000000006000661223142177000212100ustar00rootroot00000000000000BM66(                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""################################################################################################################################################################################################################################################################$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' (!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!(!)")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")")"*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$*$+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%fe]s~+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%+%,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&ӯ,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&,&-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'vy-'u-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'-'.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(Ǥۗ.(.(.(aYO.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(.(/) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) nrjaWLqsj^^W/) ݈mmd/) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) /) 0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!񌺾[QFec[0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!0*!1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"bZO[XOڠ^XNÃjnphhtp1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"1*"2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#ikd}޼2+#2+#2+#_UIbWK{vf_U𙺺2+#ۥ2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#2+#3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$t~w3,$3,$˝3,$3,$VLBrsj|3,$ӣtyqbWK3,$]SHNE<3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%`VKwy3-%^d_3-%3-%3-%`UIr}w3-%3-%v_THomd3-%d]Rwyϸ3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%[PE`VKe]Sf_UbXL^SH`UJhh_c]SgcYd]R^RF3-%3-%3-%3-%3-%3-%3-%ZQF]TI]TI^UJ]SG`YN3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%3-%4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&uzs4.&4.&4.&c[P4.&jg^߹4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&]SGXPFmzvx4.&oneaVJgaWf`VbWL`THXQH_[Qosl}v}vig]faV_SH^SHig]ptlfaW^SG]RF4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&4.&5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'u{t5/'5/'5/'5/'5/'TJ?aWK\QF5/'5/'5/'5/'d]SƋ5/'ie[ݧ5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'`XM5/'faWZPFZODZODpphmlc5/'5/'5/'rtlaVKcicgaWd\Qd[P5/'5/'5/'5/'5/'5/'5/'5/'uzsjg]d^S5/'|gbXaVK5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'5/'60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(r{uՍ60(60(60(60(60(`VKhh`e[Ph`U60(60(60(bWKyy60(ppg60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(Ԃ`XNsxq{jg^bZP_XMli`caXUKAf]Rf]RkqkuzshbXd[Ot|otmhe\kjajg^bYN60(60(60(ie[d]R60(ie\_SG60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(60(71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)nof71)71)71)71)71)߷bVJ}|}71)71)71)ibW׍71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)`ZPzUOF{71)71)71)71)71)71)71)71)gcXkg]nqiTKAjcXbVJcd\mkbie[prjjmfildgiax{jg]f_T[QFUOEZOEhaVcYN71)ie[`UI71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)71)82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*rul82*82*82*82*82*82*82*rskȊ82*82*82*82*lh^e\Q82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*oxr82*{82*82*82*82*82*82*82*82*82*qzt{c\QnlbaYOr~ynkabVJooehcYnne}u~xpumnrjkh_e^S\QEhf]kogli`]WM82*ie[`UI82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*82*93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+aWKhbW93+ZSJꖿ93+93+93+Gbfw{s93+93+93+93+cZOf^R93+cYNXNCo93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+b^T93+93+93+93+93+93+93+93+93+q{uw|sZQGfe\ntnwhdZmja_UJRMEnkabVJ93+je[bWKprj93+錦x|nqipqid[P^SHkrl~cXLli_\SHqrigaVki`93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+93+:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,ig]:4,v}:4,:4,^]UKC:hh`d[Oy~:4,:4,:4,:4,cYMlf\obWLqskp:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,wr|wHF?:4,:4,:4,:4,:4,:4,:4,:4,c^TecZy~:4,:4,ie[mja_UI:4,nkadZNRI?h`UbVJz}:4,x|nqiki_d[P]RFe_Uu|dZOe[P{}bWKd]R:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,:4,;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-冘YQG;5-v|ob_UYSIbXMx}dZNe[O;5-;5-;5-;5-gaVpndgaVoneo|x;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-ufaWd^Tif\ie\faVnneaVKhd[d\Q;5-;5-;5-TMCb`Vz;5-;5-]WNr|wjg^nlcaWL__Xf]Qw{sz|dZNkf\Σ֞ːxu|uv{sppg^SHaXM]YPkf\dZN^UIbWKfaW;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;5-;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.bWKh`U;6.;6.\TIfe\^WMcXM]okajcX;6.;6.;6.;6.lj`ćmjayy;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.ih_˚;6.;6.;6.;6.idZcXMrvnkh_WZSovo;6.;6.[TKlqj^XOs{if\mi_c[PcYM;6.;6.kg];6.`UJ;6.;6.;6.;6.;6.;6.{qqh_UJ`VJa`XcWKicX`WL~jeZe^T;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.;6.<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/unsmuy<7/<7/<7/gd[]UJoung_TStx<7/<7/<7/haV٘nlbƬ<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/aZPhe[kh_svnqrjhdZnne`UI<7/<7/<7/nmc<7/bVKv<7/<7/<7/_]TrzZSImslǡd^ScWKif\olbcXK<7/<7/nlbdYMmka<7/<7/<7/<7/<7/<7/tvn`VK`UJg^Sqpgji_rtkhaVsvn<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/<7/=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80svm=80XMB=80=80^UJii`^UJ=80Ћ=80=80=80h_Tkf[=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80{wx`XMb[PaYN`WM^UJkg]aVJ=80nzv=80lh^e]Rf^S=80=80=80cf_ac[imfĸ=80=80twn_SHwye[Oyx=80g_Skf[=80=80=80=80=80=80=80mkaaVKaWKyxlg]uymnemkaf_S=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80=80>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91cZNcWKم>91>91>91faXd_Uig^hg^MC:~ݰ>91>91>91keZ{zmi_>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91d^T>91lkbߧ۰w~wc[PqriRH>XMBYNConee\Qg`UEB;YXQMJCp>91>91>91>91>91rtkrsjcYN{|dZN>91>91kf\>91bVJ>91>91>91>91>91>91>91w|tg_T`VJcYNnkag_Ty}npgpoecWK>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91>91?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92swotvmw|s?92YNC?92?92]QFkja^TIx{dZN{mf[f^SҲ?92?92lg\珥uxo¸?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92c\Rٰv?92?92?92?92?92WMCXQHwx`XMli_rumed[ebYmkboofcYMYNC_VKikcYQG_XN?92?92?92?92?92?92?92ie\ke[jg]eZMmh^?92?92jeZjcXrsk?92?92?92?92?92?92?92omddYM`UJf_TpofdXL{osle[O?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92?92@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3qztx|@:3@:3@:3bZOlkbaXMdYNr}xtԒϽ@:3f\P֦툙@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3ȑ@:3@:3@:3@:3^VLhg^oumuywo}yf`Vlj`qsjt|nkbdbZd[O]SH]QFaXMmqj]UK`XM@:3@:3@:3@:3@:3qqgaVKf^R@:3@:3mka@:3e^S@:3@:3@:3@:3@:3@:3@:3@:3qqhgcYaUJrsjv~wvyqe[Okh^@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3@:3A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4fcY}A;4A;4A;4A;4hd[gcYie[i`Uuwn޼A;4g\PA;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4ǩޖA;4A;4^VLkmev{hdZmlbc_Ursjmslli_bZO^SG_TIgd[ed[^UJeaWA;4A;4A;4A;4ie[g^RhbXlg\qpfA;4A;4lg]g^SsvmA;4A;4A;4A;4A;4A;4A;4A;4zzd]RaVJrriibWA;4rwomjadZNA;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4A;4B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5e]RdZNeZNd_VB<5B<5B<5^SGmlcbXMtyqf]Rx{sB<5B<5ةibV۟ٻB<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5{ǰxЁB<5B<5hg_v{ȤשB<5faWtxpge[󈜗dYMhbWilde`U_UI_SHf`Votm_VKbZPB<5B<5B<5B<5nlbaWKjcXB<5B<5mj`B<5f_TB<5B<5B<5B<5B<5B<5B<5B<5B<5twncXLbWLc[Pnkalh^B<5qwodYMd[PB<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5B<5C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6w|sw{sC=6C=6C=6C=6bYNnofaWKC=6w~eZNߤC=6C=6өf\O`VKC=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6[VLC=6[SHpvoЫC=6C=6QPIakfaYOf^SpskC=6mi_g_SrxqgcZ`VK`THf`Uqvo`XMbZOC=6C=6C=6idZmj_icYe[OuvnC=6C=6f[OrriuxoC=6C=6C=6C=6C=6C=6C=6C=6sskjh^aVJf\P_VLpofdYMC=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6C=6D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7^TID>7D>7D>7D>7hbXlkae\QD>7tvmg]QĢےD>7D>7ǪcZND>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7u}vD>7جa]Sv{˩j|yօD>7D>7PMFdmhx~vrtlOG>YQGibWx~vooev~wie[aWK`THgbXqwobYOc[PD>7D>7D>7uvnbWLtulh^SD>7D>7mh^D>7e]QD>7D>7D>7D>7D>7D>7D>7D>7D>7x}ud]RbXLqqge[O{x}zytuloskD>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D>7D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8tzsy~vd^TD?8D?8D?8b[PljagbWkg]D?8|{uvm֣͠D?8D?8olbD?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8|źcaXxԮrn|D?8D?8D?8\RGa\SecZfg^xyv{sc\QbbZppg]VLe[OwxhdZaVJaVJjf\qumaXMd\QD?8D?8gbWaWLli_lf[D?8D?8D?8mh^tul|~D?8D?8D?8D?8D?8D?8D?8D?8D?8mj`stkbWLhdZf\PD?8zzdZNnlcD?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8D?8E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9f^Sd[P~E@9E@9E@9aVJoofdZOw|tE@9Ⱦ՞ѫE@9E@9|zE@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9[[Sif\qtlu{sw~wv|urumlj`bXLc[PE@9WNDa]ThialpipvoE@9E@9c]RdZOophmtnkf[cf_omcuzsgaV`UIcYMmmdmneaXLgaWE@9olbd[Pvwnf\OE@9E@9omch`Tg_SE@9E@9E@9E@9E@9E@9E@9E@9E@9stkgaVbWKnkae[ObZO~lg\]SHE@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9E@9FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:pmcokaFA:FA:FA:e\QqricYMFA:^TIidZFA:qoeܜʭFA:FA:|zFA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:w{FA:wnogv~wpsjmneoqipsjsxprvnu{t{{hcXbZOoztx~ppgaWLxpofp{uibWfibqskd\Q`UIe^SrulhcZbXMlkbFA:rqhbWKsvmFA:FA:FA:ssjnkbFA:FA:FA:FA:FA:FA:FA:FA:FA:FA:ondqpfcYMf_Tzx]TI^TI}f]Quyq|FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:FA:GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;_VK_VKGB;GB;GB;_YOidYonee\QGB;cZOGB;ʩߙîGB;GB;ۺGB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;if]ji`u{tʬڞŒ}xzlj`e^TGB;GB;GB;GB;he[e[Pki`dYMtvmx~vhngmkabXMbWKje[tyqc[Pd\QGB;suljcWibWGB;GB;GB;GB;iaUpmcw{rGB;GB;GB;GB;GB;GB;GB;GB;GB;mh^hcYbXLibWb[PGB;rriuwny}GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;GB;HCJE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>ibWf\Qh_SJE>JE>JE>e[Pstke[PywJE>x~vJE>JE>yJE>JE>~johJE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>kj`ې]a[gd[rvny|™vzoogmme[QFJE>JE>mi_c[OJE>omcJE>nkaJE>~je[bWKd[OonerumcZNe^SvzqjcWJE>JE>JE>JE>JE>y~vJE>JE>JE>JE>JE>JE>JE>JE>JE>JE>omdoqhcYMኝe_Unj`keZ`YNu{sJE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>JE>KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?i`UKF?KF?KF?^TIhaVrsje[PKF?aVJKF?KF?KF?KF?}KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?hg^~llçKF?RKCbe^q}x}ŸKF?KF?KF?KF?w}mmdqumZODildsul_UIuzrqpfKF?mi_KF?KF?rume\QcWKidYuzrje[dZOlldh`Uf\PKF?KF?KF?KF?g]RKF?KF?KF?KF?KF?KF?KF?KF?KF?KF?g^Sf^Se[P||aWK^XMqqhcYNKF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?KF?LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@cZNpskLG@LG@LG@ih`kg]qrih`ULG@e\QLG@LG@fjcLG@LG@|LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@|śLG@ܚîLG@[VMlqj{wǦزLG@LG@LG@LG@LG@LG@~g`VZODf\QhbWLG@lg\LG@\YP{je[cXLf]QpqhsvndYMe_Tnj`f[Ouwn\RG_VJ\RGUK@cbZLG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@~mlccYMlme||uwnhdZLG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@LG@MGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAd[Pomdnkalf[MGAjf\MGAMGAMGAMGAMGAMGA{}MGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAᐵ_ZQs|vmƧMGAMGAMGAMGAMGAMGAMGAMGAMGA`UJkmeYNCvxpbXMiupmj`[SIYRIMG>rsjf\QdYMkf\v|thcYrqhiaUmh]႔`WLaYN^VKWMCsriounMGAMGAMGAMGAMGAMGAMGAMGAMGApndstkd[OMGAMGAaXMrsijcXtvngaWMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMGAMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBg]Qi`UMHBMHBMHBMHBdYMrsjke[f\PMHBnlbMHBMHBMHBMHBMHBMHB|ՎMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBllcMHBuײ`[Rw|zy¥ܶMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBd]Sc_Vw|t`VJqripneZRHpnd[TJZRHUMC|kg\cXLg_Tsvmprie\QjbWg^RMHBMHBxz_UI_TI_XN_UJlmdMHBMHBMHBMHBMHBMHBMHBjcYicXe[Ozwvyqgd[jcXli_u{sMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBMHBNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICttjNICvwnaVKNICNICNICf]Qtvmh`TtvmNICx}uNICNICNICNICNICػNICNIC~ЖȿNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNIC~ˎnwrȰ^YOyj~|fjc׫ꀓNICNICNICNICNICNICNICNICNICNICNICjf\}g^RgaVy~ujcXNF=oytrsjdZNdZOnlcw}uhaVc[Pg]Qh_SNICNICNICNICc\R^VK_VKfaWNICNICNICNICNICNICNICuxopridZNolbvypNICbYNqpgmnekh_NICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICNICOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDjg]OJDOJDOJDc\QibWuxof]QOJDe\P~OJDOJDOJDOJDOJDOJDOJDȟ̰OJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDy~~ziia{gwtѧ|OJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJD~`UIhjbstkcYMjcXx}uwzr\YP[XPVTLy~wg^ScXLjdZv{smkbjkbrqgttky~vXMB`ZPOJDOJDOJDjjaa[Qx|OJDszsd^T]RFOJDOJDOJDOJDOJDolcuwne\QOJDOJDmlcx}ug^RyxOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDOJDPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEh^RjbW{}PKEc`Wwzlg]tvmg^RPKEdZNPKEPKEPKEPKEPKEPKEPKEĢӭPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEfg`PKEw|ϭzwმɥܝگPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKERMDxpqhu}vomcaWKtxow{r\ZRrqgbbZcc[`aZkf\cYMg_Trtkw{sf^Ssumw{sjeZwypXNCZOD_XNtulqoey~vd^Tw~w`WL_UJPKEPKEPKEPKEg^ShbWdZOolbPKEkkce[Pd[PPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEPKEQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFlf[QLFecZQLFomcssjh`UQLFh`UQLFQLFQLFQLFQLFQLFQLFئQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFsyrsyslqkװ͍QLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLF_]TQLF~`UIy{uxokg]stkp}rqgvoofdZOdZOnlbw}vhcX`XNh`Tg^S^VKZODx|siaVf`UQLFwxQLFaWL\VLQLFQLFQLFQLFuvmqqicZNstkf\QQLFaWLywy}uQLFcZOQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFQLFRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGnlcRMGx}RMGe[PrrhpofjbVRMGjeZRMGRMGRMGRMGRMGRMGRMGnuo٪μRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGw|ώszsRMGxޏХ܃RMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGcf`RLDUPHWVNki`yf\Qf_TÍpmcXWOQKBRLBURJw~vf_SdYMje[v{sonev{uxoe[Pvxoa[Rmh^okaolbglehcYwyRMGbZOrxpRMGRMGRMGnkae\Px~vh`URMGolbkeZyxRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGRMGSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHe\QVSJΥf\Quwnmi^g]QSNHrriSNHSNHSNHSNHSNHSNHSNHs{ֱSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHkldSNHs|ǡ{SNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNH_e_tJE>ZSIXOEVND|b[QssjdZN}}~iqknzusjdZcYMh`Usvmv{sqofdZOtzsxquwn[SIWLAZQGrtlidZxxZQGSNHbYNkngSNHSNH|}g`Ud[PstkSNHqpff\QcZNSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHSNHTOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOIprilg]TOIe]Ri`UwzqkdYni^TOIuxoTOITOITOITOITOITOITOITOIwϲTOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOIt|v̛̆TOIvƔϪlqkTOITOITOITOITOITOITOIb[Qkkbptmsyrt{usyrptlkkcd`V^UJXNCYVNaWLrrhcXMsvnolbiuqnka|{mkbdZOf]Qppgx~wgaWe]Rmj`ed\v{hdZ_WL_XNzx^UIZPFidYe^SikcTOIaXMfcYTOITOI}~mkad[O|{TOIcZOkf[u{sd\QTOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOITOIUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJdZOw|tUPJUPJv}ujdYvzqh`Tx{rjf\{{UPJUPJUPJUPJUPJUPJUPJUPJyp|wƤUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJЭUPJing}ǞπUPJUPJUPJUPJUPJih_vyt~xgh`xzlj`nk`TJ@y}uUPJUPJUPJUPJswnf]Qd[Onkax~wcYM{|~jdYy~s|vlnf{z^SG`VKppg`UI{UPJe_T^RGUPJvyprtkd[Pmi_rqgUPJpnet{UPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUPJUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKiaVUQKUQKUQKmh^vyqh`U~~UQKd[OUQKUQKUQKUQKUQKUQKШܦUQKUQKUQKmlѪդUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKlogvyUQKUQKs{lrkũ䈧UQKUQKUQKkmeyȢӨ߱XXQjvq}UQKUQKUQK]TJw|thaWywUQKUQKUQKUQK{~haVdZNlh]zzrulUQKg^Rg`TUQKzh_TmnfibVb[Q_THaXLx~ii`|szspndtulf^Skf\UQKUQKtulvyqppgUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKUQKVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLg^RVRLioiVRLpndvzqibVVRLe\PVRLVRLVRLVRLVRLVRLݩޱVRLVRLdaXYNDşճu~VRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRL~VRL{VRLVRLvʛ˗VRL_THcYNZOD]RGb[QպVRLVRLbb[u܅VRL^[RZPDWMBNE;`WLf[Oe\QZRHVRLVRLVRLkg\dZNjdYvyqv}uw{rf]QVRLkdXvysrikmed\R_THc[Ponee\PprjiiakmdgdZkja||pneibWg^RVRL_VKzxjbWe\Pv|tVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLVRLWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMqzsf\QWSMWSMg^RsritulkcXiaVWSMWSMWSMWSMWSMWSMWSMWSMgd[aVJͲᎱWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMҤWSMWSMozuszrtl|cYN]SH`VKbYO]^VuZUKZQGZQFZPEYNCXNBWLBTJ?ptktvmcYN}zxbYNTKAWSMWSMv{re[Og_SqqhhdZrrhf\PWSMWSMqpf\QFr|vmnfc[P_THjh^bWK|lnfbZOa]Svyqmkaf]QWSMu{svxpkh_WSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMWSMXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTN^THh`Udf^e\Pg^RuwnsrijbWnkaXTNXTNXTNXTNXTNXTNߴXTN]SGhe[`UJŸXTN٢XTNXTNb\RnslnqiXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNrzszׯXTNXTNmzutxpǠɢΣХԧاףϛnmdgcYc\Q`WKUKA[RG^VK_XNaZP`ZP`ZQ]XN}qpfcYMtxpombeaW_WLUK@UMCsumf]Qf]Qomdlj`tvlf]QqqhXTNXTNXTN|}sri{zy}jh_bYN`UIXTNnrjii`w{rrsjf]QXTNXTNzwv}uwXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNXTNYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUO\QFolbYUOYUOjf\i`UolcYUOYUOYUOYUOYUOYUO̽YUO_SGfaWcZNБ¤YUOħYUOdsqd\RqvonogYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOw{YUO|YUOYUOYUOd]RǦس󂟞YUOYUOYUOYUOYUONNHՓppgbZOgaWaWLjngotmounmrklqjcf_x|tcZOonewzrx{jcX^UJfdZ^VKVMBx~vg_Te[Pmi_yyqsjonef^Snj`YUOYUOYUOYUOwyq|}pzuvxhcY`VJbYMvzrge\b[Pmi_jf\f]Rkg\YUOYUOYUOokasvnYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOYUOZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPf]RzwXNCZVPZVPjbV׼tulZVPZVPZVPZVPZVPŗȟZVPaWKc\Qg`UꉯZVP̫ZVP~e]SrwopqiZVPo|ZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVP}ZVPš®ZVPZVPophȪicYvzroofg`UbYM^VKc`Vhh`oslr{t㎧f_Ue^TgbXfaWz^VLkg]omcklckja`XNfcZa\RicXe[Pkf\w}u~jdY{zZVPZVPZVPjcXzxw|qund]S`UJjg^ondZRHw~wlnf\UKqqgokahaVg_SZVPZVPzwxxvypppg{~ZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVPZVP[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ_TIwzq͉[VQ[VQ{y[VQ[VQ[VQ[VQ[VQĘ˕ő[VQd\PaVKlh^n[VQԳ[VQe^TswopriWMCv{[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQqystztǛ[VQ[VQu{sw|tЧ՞ĉljaaWLͥw~waYNkja_WLmog|}icXw{sb]Sx{slj`c_Uc_Vkg\f\PjdYuyqyypum||haV[VQ[VQwzrh_Tif\mmdzkjabYNaVJtxpbWK[VQijbmh]je[g_T[VQtzsy~viaUlh^f^S[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ[VQ\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR^SGވ\WR\WR_XMhaV|{\WR\WR\WR\WR\WRϝɡׄUKA\YQg`V_THqriAUV\WRktotyg`UrvnrulZPEwz\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WRx̒\WRwx}|غ\WR\WR\WR\SIb[Qjjas{t|РȬ╴txq`VK\WR\WRaZOknfxg]Qg_T~g]Re`Vgf][UKnkbf]RicXzzhdZiaVg_T]VL\WRpmci`Uh_S\WRaVJzw~xf`U`UJe^Skg]r~ynkbf^Rw}ujbW\WR\WRuwn|~\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR\WR]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS]XS„qpf]XS]XSlnfئyg^R]XS]XS]XS]XS]XS]XSݙïvZOD^[RjeZ`THywumsl^UKmlc򃚘hdZyxdd[YODw|tշǛpqhכ`XMw}_VKhf\\TIuymog^SGWND^SGkmfji`_UJjja]WNaXMjkcysulhbW~훚b^UYSIrrinj`ttkdcZnqiu{t{mmcki_rwoc`Xolbuwnsqhy}tig]pmcstjkkbhg^ÜYOCZXPt{[QGkh^lldif\\TJz{ݺf_T`]Usvn䜛ZSIeaWouo_UJ_UJVRJu}wkogy͜_UJii`he\VNDfdZPG=cZPw{_THnphebX\TJ`VKs{tw}vkf\je[daW]WLSMD{yqndy}uge\syr|uzski`ig]one{ff^uyqsrhmj`x}upmcpmcv}vqpfϜkjafe]Ɲ[RG}YODլ]\Tie\ͽ~aZPpqhqriv{sgqm`ZPdf_^VK^UJhias{ac\^UJpxrc^TWPF_YPyd]Rz~e]Rx{he\aZPff]`VKaZPpofmh^faWrtkfcY`YO]VLx{rok`baXszs~oofki_ff^ccZy~uqpg~||lkbmjaqumkjaОimfUJ@đfbXq}xz{lka^VLe`Vsulyyehb`XNkkcxge\ebY^UJ|etq`[RvaaYZ\UaVJtxpbWKqtly|hdZe_Umnf{`VK`WL`]Ttwo~faV~鞝gdZb[P_XNXTK|zttjpmbknf||nmckh_ppfwyfe]x|s{zstkŦhe[vypone~{mlcҟsrirxpX]WǟoofڵWOE_b\u}wvyqbbZZTKa\Rlogad^TLBw~_VK`YOjldc`W]VLa\Sdd[fibjjb`UIki_}ZQFbXMppgwxhdZjg]syqaWKig]mqjfjcsuljf[ebYb[PaZP_ZQ_YOx{rpndttk{qrjljajh_poetwn]\Tff]x~vpmcw|tqoeqoesxqpmcssjnofڠ{x|pvoLF=\b]jg^ӺĠsvnSLBXNDmka{{󃑋_[R]WM[YQc`Wa]T_YNc_Vfd[fcZd`Vc_Uu|ubXMc[Prvn~ˁcXLv{s~noflj`prj|ҠbXL⠟w{ed[svmkf[b]SqskàfcYaZPa[Pa]Sjjb_`Xttjokazwnndlj`摰jkcb`Xqrjrrhone}pndkj`kj`oqhޡsri{y|fmgelgnzux~vtul[UKWND\RGjf\\UKPG>`]Tcc\eibƕo|m}yaWKsvnswooofrum{bXMߡfaWecZoofqpfe`U~c_Vd]Sb[Qc^U``X`YOy~vqndpmcrulmlbki_qqhpof~gmhhh_gg_qoeqriie\wyqmlbݡoqi㢠srh{y^ieKD;VMCcg`TMC|vyqSKA[QFkphf_Trtlˢ[RGTLBQLDb^UjnghmfmvpMD;RJ@TNEbhchdZcZNrul~v{tswoxxdlgUOFmuo]YPNIAcZO䢠cZOii`yw|tie[gmfa\SaZPf`Ud`Wee]c]Swtvmokassinnelkaig^gg^ec[yyqoex|torjpndjh_Հpsk裡rqgvxo>7/~QI?jvqabZUQIyx{{}}UOFa]S{he[˲𣡡txo֣\SHVPGYRH{{z}}PH?uecZ^YPjjae]R\YPf`UszsrzsOMFqpgrul^YO_XNa[QgcZhiaiiaknfqpfpmbwzrϣrummlbki`oofgg^logqsj|{omc}jh^rpgji_~ȷptl룢ssijuquvl~}zJB9ZTKuvmH?6Z\UWOE`lh^rqlkb}zyC;2psk٣rsjuvm{~棢c_V[RG~ԣxZQG`\Smzu~eaWgaV\[S]XNjg^orj]\Tqqhkf[d^TVQH^WNfd[ikcqwod^T|}olbqoe{znneljaljaff]ff]hkcnmdstkig]t{tpndrskyptlvxo\^V|ނRRKkmettkUKAYPEZPE[SH_ZPqsk{{txpжuxpۀ_[R\RGv|hjbWPGYQFcc[zva]T]UKZSITLBfcYkh^sp{vnofuzrnph^`Ydg_nmcrsjhe[[VLmpiluod^Tuwonkassjqtllkbkjaljaqqhgf]vxyyssjw{s|qpftulqxrόqvoŽ񥤤uvmTOF~sB:2p{ttkxzrXQHSH>v}v}Z\UMF>zyʍ↙lslx~punounXODjkc`WLc]Sqwo[TI[XPoofqrjMG?nnepsk}v|tswo_\Sr|wfcZx|p{rqhokavyqʥnnemlblkbsulgg^knfkldqrjpndnmc}jh^z~qoepskkohߘqwo򦥥wzqNG>vxokuomqjstjy}tlIB9|ctrVMB[TJǐʦnqjjkbZQF\SIUMCd_Ub[PxzvgbXgaVw|ujf]rwoNIAfkdv{snlbւ\ZRpmbiiakkc}}omcpndzxqsjmmcmlclkbrulde]hianxrnmcrqhsumjg]run||yyәpunuul򧦦y}uJE=TWQ[VMQI@ssjtnneo{\ZR_[R֧ŝpwp٧fcZXPFdaXdcZYPEounb[Qb[Pjh_zc]Rg_TħuieZvx~`lha_VZULnmdlh]hf]fib{ye`Wzy~vssjrrixznnemlcmlcjjaqrigf^u{lnew|tvyqw}uy{qofrwo~w}uʛpvo푩򨧧zy:71KG@݅\^WstkkwrQKB\`Zllcrulhrm_\R㨧̗𨧧aZP`YO~[RGZTKjjbaZPaZPge\rztgbXmogli_XRI[ULieZy}}b`X`[Q^XNWRIoofx~vt{sozupmcb]S}v~ttkpmcuwnǨpqhnmdllcmlcъrskdd\qxrwznrkqqhqoeǀߐjh^u{tqpflkb˨鄕zz˕ntm⑨󩨨||gyw}QKBstk`hcPKBtzsw|u_`Xa_VsvnҔ͊Ʃ]ZRy\UKs{uULB]UK[SIKDr|_gbƫᙼt{cc[_\S_^UglevSKBXULipjZTJXSKVXQllcli_~b`XQTN?:3qqgХ֐ttkqoeqqgrtlnlbtwnnjb`X__Worjnnfnnfnognqjuzrxysyr~nmcqoeӀoqhnofqqg¿ľcokJG?r̍ZppekeZQGgjc|D<4NNGpumߛéްu~xlpic_Vee\fg_hjcjngkohkngmnfhcYoof’vvdkeWVN]^WMIB_a[ooeli^qpfrpgqof]\Tonepoeyw][S≚oskmnfmnenogmphttju|tv}uu~}uxplkbqofz|qultvnu鏪cspJE>P_]GA:XZSxiVYSfjb[TJ^YPa`Y_\Sjkcѡӡбqzsszshf]gcYgdZkjatyrώfkeSPHLG?jngcbZnphomd}OVRhtpqpgsrhhvry}ttyqomdstkȎsrilnemnfnognqi~pskv}vpzt{u|upofssjƨoqhrri{z⓰π\nmylTOFq}ryr\`Zrltncc[dcZmrkəʡԲ_VJ`YOy^WLӲNKCXSJ]YPa_Vff]lrklrkjmefe\daXbaXfcZstkKIB`e_eojomdgjbppgomcvzrÈ]`Yrqgy|tijblnenpgnqilqjrriznphv{snmcrqgxyqtl|~~~FVVvSMEZ\U[pp~ăH@7^b\}w=>:`VJ[QG{b]SdaXWND_^Vgnicd\UOFr|vdaWb^Td`WecZgf^hi`ijbjkdjkcijbgg^fd[fcZgf^pxqqqhnkabhb^\T[XOrqg\[Trpgpofpof|}Ɠrqgkldmogorkvyp|mogppg~qpfophssjt}vz贳z~B>7􊠜[^WSOFfg_JB:`e_kyuHF@aXMef^b\Rd`V]UKYPF^YOee]wqxqhg_fbYeaXeaXebYfbYfcZgd[hf]lnfx^onmnfmkax}vѴ{{rpg_b\ttjprjomcuwoʍtvmmogoslounuulzzt~xmofw}vnmcqqgŽrrhuvmpskw|t۴ٶ~vwn|}JKEchaxdzzMEflffqmdf_xe^Td_UϵgcYqvnmrj]YPge\nwqWYRrsknkbXUM__Xkvrrqhstkmkbsum͍͵𞿾s}wrrhkmew}vmlcqqgtulorijjarqh͵敷uwntum42-z|@<6䇜PG>lwr|KD;wPH?fmg|XQG_]Tuge[{y~aXNcaXWPFjmemmdmkbv{s\e`OLDWTLrqgknfsriuvmmmenndx~wtulrrhssiv\a\tvm~lpippglkbrqhuvmmnerrizߔw}uYx|xy}∞jxt|QI@sRJAhoiWMC^ZQmvpe_Ug`VƷgcYc\QsyrnqifcZrsk[ZR]ZQjsnqqhrqgff]lkbrsk~͒gibrrisrilnfpxruvmɗv|tnmcqqgssjnqhijassjǷlrlsy~vef^OXUv}uґ~UMCUMCTLBksmQJAipj^ZQoys_]UbYN}aXNfbXbZOc[PpwqaXNabZu}vrtknlbraaX_]Uoofgh`rqhnmcw|uƋfhailex}tyrrh͸ppgmlcrqhrrilmdtvm~ϥvyqgkdsxpPZW~وsXQHUMDcc[PJAksmWQHaXMmpiaZOйb[Pozumogkjamka|~lsmrqhee\rrhrriji`qrj~̓lrkqpgsrivwnhoitvm~uzrnmdrqhy~vmnf{zvyqƜ}ՆZVMϺVOE[VLNI@jrmPG>ophc]S_WLĺe_Uhd[~bZPfdZqrj׺stkrqhcaY}~mlcu{sʌp{uurriqztx}uonemmdqqhtvmllduxo~ԖϬ~~xzq{~хXRIVOFXPFID<6MSNclgnrjpqhp{uRb`QRKsvmvzrgh_sulΡՖtvmwzrRQJlme|~lsm|~qsjsxqu}vyzef^imf{~̐+,)ŔñXuw}~~,,(de]t|_aZͪwʿji`jh_kjblmepvocidqiuqo{wejcmmeu{tpqivX`\exv;94x}ukmew{s}ɔUSL[ZR^\TabZgkdjupv{s||y~vu~wtvns|rumrvnszsx~vce]mslv|u͜{}Ƥρf×`~}~}773imft|hkdΩxȾo}ZXPkjalofkmeloglpilrklrkkrkgojpxrjohde^vyqxqprjRTN]c]^faT[WsvmuypikcsulБgzyVWPXXQ`gak{xrtktul^_Xz{~|}{svnqtlsxqtwor~x~rvnΟy{苻΁ŋVqsߩ;=8muot}kohΤyƽig^\[Tujjamogotmpwppxqpvonrkjmeff^a`X_c\mogqtlsvnJJEk{ySRKMOIekeuyq`aZr{tywLXVs|mtn``Yz|}jwtzxlne~swoprjt|uqsjw~v|uφu~dyywɋ:MN~ܦ;=:oyts|mrk}ͦyƼz]]Ucf_ih`ii`jjbiiagg^ed[dd\r{nphvpriUa^MPJ>FDkpjswox~vpxrtwo^c]cgadf_dd\ed\hjbjpjxwy~vtumlmdsvmswpprivyrtkyxu{sx{dg`˒G_bȋo}}}ث696p}xq~zpwq~Ξ{ûji`chba`Y_^V`c\agbaidbkfcnjdrorxpt}wqskJIBoyskphtxptvnktn}{|jwso|xs}y~rtkx}vixuhiaxzbkfWYSrwoprjv~xrtk}|ѢڍFJEъ*5510,Xooow~v|ҩ),)p~zn{wqztz|Μ}¹imfoslfmgaaYbbZdg`gmfismhsnpysryrqtksxpfvtnxsmsmjmfde]w~w}yyTYSHIC~~}sw}v|}HJD|~ryrcusX\Vqumpsjw|qskҟ|׎Zd`ZqrZb^z{̧o~zjwrr}wosl͠ǁjkbqxqsaaYaaZac\_c]r{uqwpsxqqtlgnhjpjjngikcgh`swoxxsvmp{_aZWXQy{zzsumqwpxxq|orjqunx~qsjv~ňӄisnn}ws|y~£-.+l|yfqmszguq˜!$"\_Ykldmphn[]Vpwqqvorxrsyrrvobe^gibhibhiaikduzruyqIOK{}wtxpsvmYa\pvprulpxrmogv~xuzrbd]nskԠ۞ƚ_fau[`Zsztwz|fvs_ieukzwǜ:@=mrjnqinqioslptlorjr}wptlsxq_`Xef^giailemtnivrtyr~OOI{}eokfmho}yrtkw}v|rumlqimofx~t|ueg_psjգՅk}{vՄt{ٖ&&S\Xvm|ELItv{ikcgiagjcnrjqtlt{thpjksmnwqt||zxy{~uyqeg`jqkr{m{wkxtn~uyqv|u~ef_u}vikcrxrori~ʜu~Į_uui}{nwqÑNcc to|fg`PPJxxw}h|zUWQUYS`qov~wyyrumabZcd\eg_fibgleejdxxy}pytyyvxjmfu|nqj~bjdsyrףڙwțׅbg`^z|DQPn~q񖶴~gh`UXR}\`Zfoi^aZ\_Yryrv}vv|urwonyscd\bd\]_Xxyy{~szxxt{unysknfvzpum؏ǢҜԧ`srnؑꔳp~zikcr}wpTUNdniysxpwyz}g}}qtlW]Xtxqy|t}yxptlgibluoyَު}ĩtٓבxknflogmqioskwypskQRLu{thywzrtlrwov~ptmnyt|o{vy673{ڒʎqwpotlsyrfmgnrjaf`bd]eibfjcdibTYTu~wy{qtlzۆk~|m=C?یʃۆ\b]jtor|vwwuyrxqrumwyy{rvnnsl܁n)+(ӝø_gc]{~r~yhjbildkogmqjnqiysyrv~wnrjɑݘc{|惨݅{zkohx|ދNaaߔRelease_v0.3/kernels/compiler_load_bool_imm.cl000066400000000000000000000006601223142177000216560ustar00rootroot00000000000000__kernel void compiler_load_bool_imm(__global int *dst, __local int *localBuffer, int copiesPerWorkItem ) { int i; for(i=0; i 5) dst[i] = src1[i] - src2[i]; } Release_v0.3/kernels/compiler_long_2.cl000066400000000000000000000007231223142177000202420ustar00rootroot00000000000000kernel void compiler_long_2(global long *src1, global long *src2, global long *dst) { int i = get_global_id(0); switch(i) { case 0: dst[i] = 0xFEDCBA9876543210UL; break; case 1: dst[i] = src1[i] & src2[i]; break; case 2: dst[i] = src1[i] | src2[i]; break; case 3: dst[i] = src1[i] ^ src2[i]; break; case 4: dst[i] = src1[i] ? 0x1122334455667788L : 0x8877665544332211UL; break; } } Release_v0.3/kernels/compiler_long_asr.cl000066400000000000000000000002501223142177000206610ustar00rootroot00000000000000kernel void compiler_long_asr(global long *src, global long *dst) { int i = get_global_id(0); if(i > 7) dst[i] = src[i] >> i; else dst[i] = src[i] + 1; } Release_v0.3/kernels/compiler_long_cmp.cl000066400000000000000000000017161223142177000206630ustar00rootroot00000000000000kernel void compiler_long_cmp_l(global long *src1, global long *src2, global long *dst) { int i = get_global_id(0); dst[i] = (src1[i] < src2[i]) ? 3 : 4; } kernel void compiler_long_cmp_le(global long *src1, global long *src2, global long *dst) { int i = get_global_id(0); dst[i] = (src1[i] <= src2[i]) ? 3 : 4; } kernel void compiler_long_cmp_g(global long *src1, global long *src2, global long *dst) { int i = get_global_id(0); dst[i] = (src1[i] > src2[i]) ? 3 : 4; } kernel void compiler_long_cmp_ge(global long *src1, global long *src2, global long *dst) { int i = get_global_id(0); dst[i] = (src1[i] >= src2[i]) ? 3 : 4; } kernel void compiler_long_cmp_eq(global long *src1, global long *src2, global long *dst) { int i = get_global_id(0); dst[i] = (src1[i] == src2[i]) ? 3 : 4; } kernel void compiler_long_cmp_neq(global long *src1, global long *src2, global long *dst) { int i = get_global_id(0); dst[i] = (src1[i] != src2[i]) ? 3 : 4; } Release_v0.3/kernels/compiler_long_convert.cl000066400000000000000000000011611223142177000215560ustar00rootroot00000000000000#pragma OPENCL EXTENSION cl_khr_fp64 : enable kernel void compiler_long_convert(global char *src1, global short *src2, global int *src3, global long *dst1, global long *dst2, global long *dst3) { int i = get_global_id(0); dst1[i] = src1[i]; dst2[i] = src2[i]; dst3[i] = src3[i]; } kernel void compiler_long_convert_2(global char *dst1, global short *dst2, global int *dst3, global long *src) { int i = get_global_id(0); dst1[i] = src[i]; dst2[i] = src[i]; dst3[i] = src[i]; } kernel void compiler_long_convert_to_float(global float *dst, global long *src) { int i = get_global_id(0); dst[i] = src[i]; } Release_v0.3/kernels/compiler_long_mult.cl000066400000000000000000000003121223142177000210540ustar00rootroot00000000000000kernel void compiler_long_mult(global long *src1, global long *src2, global long *dst) { int i = get_global_id(0); if(i < 3) dst[i] = src1[i] + src2[i]; else dst[i] = src1[i] * src2[i]; } Release_v0.3/kernels/compiler_long_shl.cl000066400000000000000000000002501223142177000206620ustar00rootroot00000000000000kernel void compiler_long_shl(global long *src, global long *dst) { int i = get_global_id(0); if(i > 7) dst[i] = src[i] << i; else dst[i] = src[i] + 1; } Release_v0.3/kernels/compiler_long_shr.cl000066400000000000000000000002521223142177000206720ustar00rootroot00000000000000kernel void compiler_long_shr(global ulong *src, global ulong *dst) { int i = get_global_id(0); if(i > 7) dst[i] = src[i] >> i; else dst[i] = src[i] + 1; } Release_v0.3/kernels/compiler_lower_return0.cl000066400000000000000000000002631223142177000216700ustar00rootroot00000000000000__kernel void compiler_lower_return0(__global int *src, __global int *dst) { const int id = get_global_id(0); dst[id] = id; if (src[id] > 0) return; dst[id] = src[id]; } Release_v0.3/kernels/compiler_lower_return1.cl000066400000000000000000000003221223142177000216650ustar00rootroot00000000000000__kernel void compiler_lower_return1(__global int *src, __global int *dst) { const int id = get_global_id(0); dst[id] = id; if (id < 11 && (src[id] > 0 || src[id+16] < 2)) return; dst[id] = src[id]; } Release_v0.3/kernels/compiler_lower_return2.cl000066400000000000000000000003421223142177000216700ustar00rootroot00000000000000__kernel void compiler_lower_return2(__global int *src, __global int *dst) { const int id = get_global_id(0); dst[id] = id; while (dst[id] > src[id]) { if (dst[id] > 10) return; dst[id]--; } dst[id] += 2; } Release_v0.3/kernels/compiler_mad24.cl000066400000000000000000000002571223142177000177730ustar00rootroot00000000000000kernel void compiler_mad24(global int *src1, global int *src2, global int *src3, global int *dst) { int i = get_global_id(0); dst[i] = mad24(src1[i], src2[i], src3[i]); } Release_v0.3/kernels/compiler_mad_hi.cl000066400000000000000000000002611223142177000203000ustar00rootroot00000000000000kernel void compiler_mad_hi(global int *src1, global int *src2, global int *src3, global int *dst) { int i = get_global_id(0); dst[i] = mad_hi(src1[i], src2[i], src3[i]); } Release_v0.3/kernels/compiler_mandelbrot.cl000066400000000000000000000025451223142177000212150ustar00rootroot00000000000000// Used to ID into the 1D array, so that we can use // it effectively as a 2D array inline int ID(int x, int y, int width) { return 4*width*y + x*4; } inline float mapX(float x) { return x*3.25f - 2.f; } inline float mapY(float y) { return y*2.5f - 1.25f; } __kernel void compiler_mandelbrot(__global char *out) { int x_dim = get_global_id(0); int y_dim = get_global_id(1); int width = get_global_size(0); int height = get_global_size(1); int idx = ID(x_dim, y_dim, width); float x_origin = mapX((float) x_dim / (float) width); float y_origin = mapY((float) y_dim / (float) height); // The Escape time algorithm, it follows the pseduocode from Wikipedia // _very_ closely float x = 0.0f; float y = 0.0f; int iteration = 0; // This can be changed, to be more or less precise int max_iteration = 256; while(x*x + y*y <= 4 && iteration < max_iteration) { float xtemp = x*x - y*y + x_origin; y = 2*x*y + y_origin; x = xtemp; iteration++; } if(iteration == max_iteration) { // This coordinate did not escape, so it is in the Mandelbrot set out[idx] = 0; out[idx + 1] = 0; out[idx + 2] = 0; out[idx + 3] = 255; } else { // This coordinate did escape, so color based on quickly it escaped out[idx] = iteration; out[idx + 1] = iteration; out[idx + 2] = iteration; out[idx + 3] = 255; } } Release_v0.3/kernels/compiler_mandelbrot_alternate.cl000066400000000000000000000023541223142177000232520ustar00rootroot00000000000000inline int offset(int x, int y, int width) { return width*y + x; } inline float mapX(float x) {return x*3.25f - 2.f;} inline float mapY(float y) {return y*2.5f - 1.25f;} __kernel void compiler_mandelbrot_alternate(__global uint *out, float rcpWidth, float rcpHeight, float criterium) { int xDim = get_global_id(0); int yDim = get_global_id(1); int width = get_global_size(0); int height = get_global_size(1); int idx = offset(xDim, yDim, width); float xOrigin = mapX((float) xDim * rcpWidth); float yOrigin = mapY((float) yDim * rcpHeight); float x = 0.0f; float y = 0.0f; float iteration = 256.f; bool breakCond = true; while (breakCond) { const float xtemp = mad(-y,y,mad(x,x,xOrigin)); y = mad(2.f*x, y, yOrigin); x = xtemp; iteration -= 1.f; breakCond = -mad(y,y,mad(x,x, -criterium)) * iteration > 0.f; } const uint iIteration = 256 - (uint) iteration; const uint isBlack = (iIteration == 256); const uint black = 255 << 24; const uint nonBlack = iIteration | (iIteration << 8) | (iIteration << 16) | (255 << 24); out[idx] = select(nonBlack, black, isBlack); } Release_v0.3/kernels/compiler_mandelbrot_alternate_ref.bmp000066400000000000000000006000661223142177000242720ustar00rootroot00000000000000BM66(                  %%%                  &&&            ===  $$$???NNN  ///###  QQQJJJ///   !!!   &&&  000   888===***fff   ,,,$$$]]]000###   :::111  $$$555111  111```MMMccc  AAA888BBB(((  000  DDD^^^  ///,,,  JJJ&&&%%%  UUU777---  xxx111   ttt  EEE!!!    ###111      +++      ...     %%%    BBB    111... :::###  """>>>iii... %%%    <<< ***+++???\\\&&&))) ,,,rrrLLLcccddd'''vvv 444AAA,,,    555  ###///cccXXX222ttt,,,EEELLLaaaWWW999 000     +++;;;000555444     BBB???***###|||:::888 dddSSS܂LLLEEE ---  !!!ooofff))) ]]]MMMfff(((  111  RRR,,,AAA...***  %%% LLL222NNNbbb+++UUU 999...  '''333$$$###JJJ ###SSS111!!!***%%%  rrrlll...%%%***nnnGGGCCCCCC666  (((333TTTttt```FFFLLLTTT  +++666;;;```999  ࣣ[[[+++  eeeIII  (((***  !!!'''HHH  ***,,,   JJJhhh   %%%  ''' )))(((LLL  III"""(((  VVV111\\\;;;BBBPPP  !!!BBB555ZZZ222+++PPP888   &&&///EEE   ///OOOfff   VVV666ddd    222KKK    ccc%%%      FFF    GGG        555999***...mmm%%%       111{{{TTTrrr}}}&&&      ooo...::: !!!  '''   888vvv111===      ,,,---===@@@       %%%  jjj mmm  ```<<>>DDD;;;!!!mmmOOO    vvv   %%%eee )))###  111'''  111"""RRROOO!!!...111   BBB;;;iii---mmm444   ---!!!&&&NNN   000kkkmmm,,,     QQQ...999  """     ???OOO%%%     ###      ###  """     ???OOO%%%     QQQ...999   000kkkmmm,,,   ---!!!&&&NNN   BBB;;;iii---mmm444  111"""RRROOO!!!...111  111'''  )))###   %%%eee   vvv    >>>DDD;;;!!!mmmOOO  <<<LLL!!!   ###JJJ  RRRdddvvv  !!!MMM(((eee000 ~~~555RRR|||  %%%;;;+++!!!888:::   BBB---jjj   !!!000%%%   ===JJJJJJ&&&###  """hhh'''@@@ 000lll,,,===  MMMNNN222))) 111???...  aaaBBB+++666uuu666 EEEYYY(((000  HHHkkk%%%---555]]]... OOO  ???}}},,,///[[[ 666 ppp  +++..."""xxx111ggg &&&&&&  HHH'''"""$$$"""  <<<NNN777  YYY$$$'''777SSS VVV  !!!AAA 555 JJJ###   $$$  888(((  jjj mmm  ```<<>>iii... %%%    %%%    BBB       ...        +++     ###111   EEE!!!   ttt  xxx111  UUU777---  JJJ&&&%%%  ///,,,  DDD^^^  000  AAA888BBB(((  111```MMMccc  $$$555111   :::111  ,,,$$$]]]000###  888===***fff   000    &&&   !!!  QQQJJJ///  ///###  $$$???NNN  ===            &&&                 %%%                   Release_v0.3/kernels/compiler_mandelbrot_ref.bmp000066400000000000000000006000661223142177000222330ustar00rootroot00000000000000BM66(                    %%%                  &&&            ===  $$$???OOO  ///###  QQQJJJ///   !!!   &&&  000   888===***fff   ,,,$$$]]]000###   :::111  $$$555111  111```MMMccc  AAA888BBB(((  000  DDD^^^  ///,,,  JJJ&&&%%%  TTT777---  xxx111   ttt  EEE!!!    ###111      +++      ...     %%%    BBB    111... :::###  """>>>iii... %%%    <<< ***+++???\\\&&&))) ,,,rrrLLLPPPddd'''vvv 444AAA,,,    555  ###///cccXXX222ttt,,,EEELLLaaaXXX999 000     +++;;;000555444     BBB???***###:::888 dddSSSقLLLEEE ---  !!!ooofff))) ]]]MMMfff(((  111  ZZZ,,,AAA...***  %%% LLL222NNNbbb+++UUU 999...  '''333$$$###JJJ ###SSS111!!!***%%%  ssslll...%%%***nnnGGGCCCCCC666  (((333TTTttt```FFFLLLTTT  +++666;;;```999  ࣣ[[[+++  cccIII  (((***  !!!'''HHH  ***,,,   JJJhhh   %%%  ''' )))(((LLL  III"""(((  VVV222;;;BBBPPP  !!!BBB555ZZZ222+++PPP999   &&&///EEE   ///OOOfff   VVV666ddd    222KKK    ccc%%%      FFF    GGG        555999***...mmm%%%       111zzzTTTqqq}}}&&&      ooo...::: !!!  '''   888vvv111===      ,,,xxx---===@@@       %%%  \\\ www  kkk<<>>DDD;;;!!!mmmOOO    vvv   %%%eee )))###  111'''  111"""RRROOO!!!...222   BBB;;;\\\---mmm444   ---!!!&&&NNN   000jjjmmm,,,     QQQ...999  """     ???OOO%%%     ###      ###  """     ???OOO%%%     QQQ...999   000jjjmmm,,,   ---!!!&&&NNN   BBB;;;\\\---mmm444  111"""RRROOO!!!...222  111'''  )))###   %%%eee   vvv    >>>DDD;;;!!!mmmOOO  <<<LLL!!!   ###JJJ  RRReeevvv  !!!NNN(((hhh000 ~~~555RRR|||  %%%;;;+++!!!888:::   BBB---jjj   !!!000%%%   ===JJJJJJ&&&###  """hhh'''@@@ 000lll,,,===  MMMNNN222))) 111???...  aaaBBB+++666sss666 EEEYYY(((DDD  HHHkkk%%%---555]]]... OOO  ???}}},,,///[[[ 666 ppp  +++..."""xxx111<<< &&&&&&  HHH'''"""$$$"""  <<<NNN777  YYY$$$'''777SSS WWW  !!!AAA 555 JJJ###   $$$  888(((  \\\ www  kkk<<>>iii... %%%    %%%    BBB       ...        +++     ###111   EEE!!!   ttt  xxx111  TTT777---  JJJ&&&%%%  ///,,,  DDD^^^  000  AAA888BBB(((  111```MMMccc  $$$555111   :::111  ,,,$$$]]]000###  888===***fff   000    &&&   !!!  QQQJJJ///  ///###  $$$???OOO  ===            &&&                 %%%                   Release_v0.3/kernels/compiler_math.cl000066400000000000000000000026061223142177000200150ustar00rootroot00000000000000__kernel void compiler_math(__global float *dst, __global float *src) { int i = get_global_id(0); const float x = src[i]; switch (i) { case 0: dst[i] = cos(x); break; case 1: dst[i] = sin(x); break; case 2: dst[i] = log2(x); break; case 3: dst[i] = sqrt(x); break; case 4: dst[i] = rsqrt(x); break; case 5: dst[i] = native_recip(x); break; case 6: dst[i] = tan(x); break; case 7: dst[i] = cbrt(x); break; case 8: dst[i] = ceil(x); break; case 9: dst[i] = cospi(x); break; case 10: dst[i] = exp2(x); break; case 11: dst[i] = exp10(x); break; case 12: dst[i] = expm1(x); break; case 13: dst[i] = log1p(x); break; case 14: dst[i] = logb(x); break; case 15: dst[i] = sinpi(x); break; case 16: dst[i] = tanpi(x); break; case 17: dst[i] = rint(x); break; case 18: dst[i] = sinh(x); break; case 19: dst[i] = cosh(x); break; case 20: dst[i] = tanh(x); break; case 21: dst[i] = asinh(x); break; case 22: dst[i] = acosh(x); break; case 23: dst[i] = atanh(x); break; case 24: dst[i] = asin(x); break; case 25: dst[i] = acos(x); break; case 26: dst[i] = atan(x); break; case 27: dst[i] = asinpi(x); break; case 28: dst[i] = acospi(x); break; case 29: dst[i] = atanpi(x); break; case 30: dst[i] = erf(x); break; case 31: dst[i] = nan((uint)x); break; default: dst[i] = 1.f; break; }; } Release_v0.3/kernels/compiler_math_2op.cl000066400000000000000000000013041223142177000205670ustar00rootroot00000000000000kernel void compiler_math_2op(global float *dst, global float *src1, global float *src2) { int i = get_global_id(0); const float x = src1[i], y = src2[i]; float z; switch (i) { case 0: dst[i] = native_divide(x, y); break; case 1: dst[i] = fdim(x, y); break; case 2: dst[i] = fract(x, &z); break; case 3: dst[i] = hypot(x, y); break; case 4: dst[i] = ldexp(x, y); break; case 5: dst[i] = pown(x, (int)y); break; case 6: dst[i] = remainder(x, y); break; case 7: dst[i] = rootn(x, (int)(y+1)); break; case 8: dst[i] = copysign(x, y); break; case 9: dst[i] = maxmag(x, y); break; case 10: dst[i] = minmag(x, y); break; default: dst[i] = 1.f; break; }; } Release_v0.3/kernels/compiler_math_3op.cl000066400000000000000000000005141223142177000205720ustar00rootroot00000000000000kernel void compiler_math_3op(global float *dst, global float *src1, global float *src2, global float *src3) { int i = get_global_id(0); const float x = src1[i], y = src2[i], z = src3[i]; switch (i) { case 0: dst[i] = mad(x, y, z); break; case 1: dst[i] = fma(x, y, z); break; default: dst[i] = 1.f; break; }; } Release_v0.3/kernels/compiler_math_builtin.cl000066400000000000000000000050501223142177000215370ustar00rootroot00000000000000/* OpenCL 1.1 Math Built-in Functions (section 6.11.2) */ __kernel void compiler_array0(__global float *src, __global float *dst) { int p = get_global_id(0); dst[p] = acos(src[p]); dst[p+1] = acosh(src[p]); dst[p+2] = acospi(src[p]); dst[p+3] = asin(src[p]); dst[p+4] = asinh(src[p]); dst[p+5] = asinpi(src[p]); dst[p+6] = atan(src[p]); dst[p+7] = atan2(src[p], src[p+1]); dst[p+8] = atanh(src[p]); dst[p+9] = atanpi(src[p]); dst[p+10] = atan2pi(src[p], src[p+1]); dst[p+11] = cbrt(src[p]); dst[p+12] = ceil(src[p]); dst[p+13] = copysign(src[p], src[p+1]); dst[p+14] = cos(src[p]); dst[p+15] = cosh(src[p]); dst[p+16] = cospi(src[p]); dst[p+17] = half_divide(src[p], src[p+1]); dst[p+18] = native_divide(src[p], src[p+1]); dst[p+19] = erfc(src[p]); dst[p+20] = erf(src[p]); dst[p+21] = exp(src[p]); dst[p+22] = exp2(src[p]); dst[p+23] = exp10(src[p]); dst[p+24] = expm1(src[p]); dst[p+25] = fabs(src[p]); dst[p+26] = fdim(src[p], src[p+1]); dst[p+27] = floor(src[p]); dst[p+28] = fma(src[p], src[p+1], src[p+2]); dst[p+29] = fmax(src[p], src[p+1]); dst[p+30] = fmin(src[p]); dst[p+31] = fmod(src[p], src[p+1]); __local float iptr[4]; dst[p+32] = fract(src[p], iptr); __private int exps[4]; dst[p+33] = frexp(src[p], exps); dst[p+34] = hypot(src[p], src[p+1]); dst[p+35] = (float)ilogb(src[p]); dst[p+36] = ldexp(src[p], 10); dst[p+37] = lgamma(src[p]); __local int signp[4]; dst[p+38] = lgamma_r(src[p], signp); dst[p+39] = log(src[p]); dst[p+40] = log2(src[p]); dst[p+41] = log10(src[p]); dst[p+42] = log1p(src[p]); dst[p+43] = logb(src[p]); dst[p+44] = mad(src[p], src[p+1], src[p+2]); dst[p+45] = maxmag(src[p], src[p+1]); dst[p+46] = minmag(src[p], src[p+1]); dst[p+47] = modf(src[p], iptr); dst[p+48] = nan((ulong)src[p]); dst[p+49] = nextafter(src[p], src[p+1]); dst[p+50] = pow(src[p], src[p+1]); dst[p+51] = pown(src[p], (int)src[p+1]); dst[p+52] = powr(src[p], src[p+1]); dst[p+53] = half_recip((half)src[p]) + native_recip(src[p]); dst[p+54] = remainder(src[p], src[p+1]); __private int quo[4]; dst[p+55] = remquo(src[p], quo); dst[p+56] = rint(src[p]); dst[p+57] = rootn(src[p], 10); dst[p+58] = round(src[p]); dst[p+59] = rsqrt(src[p]); dst[p+60] = sin(src[p]); __local float cosval; dst[p+61] = sincos(src[p], &cosval); dst[p+62] = sinh(src[p]); dst[p+63] = sinpi(src[p]); dst[p+64] = sqrt(src[p]); dst[p+65] = tan(src[p]); dst[p+66] = tanh(src[p]); dst[p+67] = tanpi(src[p]); dst[p+68] = tgamma(src[p]); dst[p+69] = trunc(src[p]); } Release_v0.3/kernels/compiler_math_constants.cl000066400000000000000000000006271223142177000221120ustar00rootroot00000000000000/* test case for OpenCL 1.1 Math Constants (section 6.11.2) */ __kernel void compiler_math_constants() { float f; f = MAXFLOAT; f = HUGE_VALF; f = HUGE_VAL; f = INFINITY; f = NAN; f = M_E_F; f = M_LOG2E_F; f = M_LOG10E_F; f = M_LN2_F; f = M_LN10_F; f = M_PI_F; f = M_PI_2_F; f = M_PI_4_F; f = M_1_PI_F; f = M_2_PI_F; f = M_2_SQRTPI_F; f = M_SQRT2_F; f = M_SQRT1_2_F; } Release_v0.3/kernels/compiler_mem_fence.cl000066400000000000000000000005131223142177000207750ustar00rootroot00000000000000kernel void compiler_mem_fence() { barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_GLOBAL_MEM_FENCE); mem_fence(CLK_LOCAL_MEM_FENCE); mem_fence(CLK_GLOBAL_MEM_FENCE); read_mem_fence(CLK_LOCAL_MEM_FENCE); read_mem_fence(CLK_GLOBAL_MEM_FENCE); write_mem_fence(CLK_LOCAL_MEM_FENCE); write_mem_fence(CLK_GLOBAL_MEM_FENCE); } Release_v0.3/kernels/compiler_menger_sponge.cl000066400000000000000000000117721223142177000217200ustar00rootroot00000000000000// See http://www.iquilezles.org/articles/menger/menger.htm for the // full explanation of how this was done typedef float2 vec2; typedef float3 vec3; typedef float4 vec4; #define sin native_sin #define cos native_cos #define tan native_tan #define normalize fast_normalize #define length fast_length #define mod fmod #define time 1.f // fmod is not like glsl mod! __attribute__((always_inline, overloadable)) float glsl_mod(float x,float y) { return x-y*floor(x/y); } __attribute__((always_inline, overloadable)) float2 glsl_mod(float2 a,float2 b) { return (float2)(glsl_mod(a.x,b.x), glsl_mod(a.y,b.y)); } __attribute__((always_inline, overloadable)) float3 glsl_mod(float3 a,float3 b) { return (float3)(glsl_mod(a.x,b.x), glsl_mod(a.y,b.y), glsl_mod(a.z,b.z)); } inline vec3 reflect(vec3 I, vec3 N) { return I - 2.0f * dot(N, I) * N; } inline uint pack_fp4(float4 u4) { uint u; u = (((uint) u4.x)) | (((uint) u4.y) << 8) | (((uint) u4.z) << 16); return u; } #define OUTPUT do {\ const vec4 final = 255.f * max(min(gl_FragColor, (vec4)(1.f)), (vec4)(0.f)); \ dst[get_global_id(0) + get_global_id(1) * w] = pack_fp4(final); \ } while (0) __attribute__((always_inline)) float maxcomp(vec3 p) { return max(p.x,max(p.y,p.z));} __attribute__((always_inline)) float sdBox(vec3 p, vec3 b) { vec3 di = fabs(p) - b; float mc = maxcomp(di); return min(mc,length(max(di,0.0f))); } __attribute__((always_inline)) vec4 map(vec3 p) { float d = sdBox(p,(vec3)(1.0f)); float4 res = (vec4)(d,1.f,0.f,0.f); float s = 1.0f; for( int m=0; m<3; m++ ) { vec3 a = glsl_mod(p*s, 2.0f)-1.0f; s *= 3.0f; float rx = fabs(1.0f - 3.0f*fabs(a.x)); float ry = fabs(1.0f - 3.0f*fabs(a.y)); float rz = fabs(1.0f - 3.0f*fabs(a.z)); float da = max(rx,ry); float db = max(ry,rz); float dc = max(rz,rx); float c = (min(da,min(db,dc))-1.0f)/s; if (c > d) { d = c; res = (vec4)(d, 0.2f*da*db*dc, (1.0f+(float)(m))/4.0f, 0.0f); } } return (vec4)(res.x,res.y,res.z,0.f); } // GLSL ES doesn't seem to like loops with conditional break/return... #if 1 __attribute__((always_inline)) vec4 intersect( vec3 ro, vec3 rd ) { float t = 0.0f; for(int i=0;i<64;i++) { vec4 h = map(ro + rd*t); if( h.x<0.002f ) return (vec4)(t,h.yzw); t += h.x; } return (vec4)(-1.0f); } #else __attribute__((always_inline)) vec4 intersect( vec3 ro, vec3 rd ) { float t = 0.0f; vec4 res = (vec4)(-1.0f); for(int i=0;i<64;i++) { vec4 h = map(ro + rd*t); if (h.x<0.002f) { if(res.x<0.0f) res = (vec4)(t,h.yzw); } t += h.x; } return res; } #endif __attribute__((always_inline)) vec3 calcNormal(vec3 pos) { vec3 epsxyy = (vec3)(.001f,0.0f,0.0f); vec3 epsyxy = (vec3)(0.0f,.001f,0.0f); vec3 epsyyx = (vec3)(0.0f,0.0f,.001f); vec3 nor; nor.x = map(pos+epsxyy).x - map(pos-epsxyy).x; nor.y = map(pos+epsyxy).x - map(pos-epsyxy).x; nor.z = map(pos+epsyyx).x - map(pos-epsyyx).x; return normalize(nor); } __kernel void compiler_menger_sponge(__global uint *dst, float resx, float resy, int w) { vec2 gl_FragCoord = (vec2)(get_global_id(0), get_global_id(1)); vec2 p=-1.0f+2.0f*gl_FragCoord.xy/(vec2)(resx,resy); // light vec3 light = normalize((vec3)(1.0f,0.8f,-0.6f)); float ctime = time; // camera vec3 ro = 1.1f*(vec3)(2.5f*cos(0.5f*ctime),1.5f*cos(ctime*.23f),2.5f*sin(0.5f*ctime)); vec3 ww = normalize((vec3)(0.0f) - ro); vec3 uu = normalize(cross( (vec3)(0.0f,1.0f,0.0f), ww )); vec3 vv = normalize(cross(ww,uu)); vec3 rd = normalize( p.x*uu + p.y*vv + 1.5f*ww ); vec3 col = (vec3)(0.0f); vec4 tmat = intersect(ro,rd); #if 0 if( tmat.x>0.0 ) col = (vec3)( 0.6f+0.4f*cos(5.0f+6.2831f*tmat.z), 0.6f+0.4f*cos(5.4f+6.2831f*tmat.z), 0.6f+0.4f*cos(5.7f+6.2831f*tmat.z) ); #else if( tmat.x>0.0f ) { vec3 pos = ro + tmat.x*rd; vec3 nor = calcNormal(pos); float dif1 = max(0.4f + 0.6f*dot(nor,light),0.0f); float dif2 = max(0.4f + 0.6f*dot(nor,(vec3)(-light.x,light.y,-light.z)),0.0f); // shadow float ldis = 4.0f; vec4 shadow = intersect( pos + light*ldis, -light ); if( shadow.x>0.0f && shadow.x<(ldis-0.01f) ) dif1=0.0f; float ao = tmat.y; col = 1.0f*ao*(vec3) (0.2f,0.2f,0.2f); col += 2.0f*(0.5f+0.5f*ao)*dif1*(vec3)(1.0f,0.97f,0.85f); col += 0.2f*(0.5f+0.5f*ao)*dif2*(vec3)(1.0f,0.97f,0.85f); col += 1.0f*(0.5f+0.5f*ao)*(0.5f+0.5f*nor.y)*(vec3)(0.1f,0.15f,0.2f); // gamma lighting col = col*0.5f+0.5f*sqrt(col)*1.2f; vec3 matcol = (vec3)( 0.6f+0.4f*cos(5.0f+6.2831f*tmat.z), 0.6f+0.4f*cos(5.4f+6.2831f*tmat.z), 0.6f+0.4f*cos(5.7f+6.2831f*tmat.z) ); col *= matcol; col *= 1.5f*exp(-0.5f*tmat.x); } #endif vec4 gl_FragColor = (vec4)(col,1.0f); OUTPUT; } Release_v0.3/kernels/compiler_menger_sponge_no_shadow.cl000066400000000000000000000065211223142177000237550ustar00rootroot00000000000000// See http://www.iquilezles.org/articles/menger/menger.htm for the // full explanation of how this was done typedef float2 vec2; typedef float3 vec3; typedef float4 vec4; #define sin native_sin #define cos native_cos #define tan native_tan #define normalize fast_normalize #define length fast_length #define mod fmod #define time 1.f // fmod is not like glsl mod! inline __attribute__((always_inline, overloadable)) float glsl_mod(float x,float y) { return x-y*floor(x/y); } inline __attribute__((always_inline, overloadable)) float2 glsl_mod(float2 a,float2 b) { return (float2)(glsl_mod(a.x,b.x), glsl_mod(a.y,b.y)); } inline __attribute__((always_inline, overloadable)) float3 glsl_mod(float3 a,float3 b) { return (float3)(glsl_mod(a.x,b.x), glsl_mod(a.y,b.y), glsl_mod(a.z,b.z)); } inline vec3 reflect(vec3 I, vec3 N) { return I - 2.0f * dot(N, I) * N; } inline uint pack_fp4(float4 u4) { uint u; u = (((uint) u4.x)) | (((uint) u4.y) << 8) | (((uint) u4.z) << 16); return u; } #define OUTPUT do {\ const vec4 final = 255.f * max(min(gl_FragColor, (vec4)(1.f)), (vec4)(0.f)); \ dst[get_global_id(0) + get_global_id(1) * w] = pack_fp4(final); \ } while (0) inline __attribute__((always_inline)) float maxcomp(vec3 p) { return max(p.x,max(p.y,p.z));} inline __attribute__((always_inline)) float sdBox(vec3 p, vec3 b) { vec3 di = fabs(p) - b; float mc = maxcomp(di); return min(mc,length(max(di,0.0f))); } inline __attribute__((always_inline)) vec4 map(vec3 p) { float d = sdBox(p,(vec3)(1.0f)); float4 res = (vec4)(d,1.f,0.f,0.f); float s = 1.0f; for( int m=0; m<3; m++ ) { vec3 a = glsl_mod(p*s, 2.0f)-1.0f; s *= 3.0f; float rx = fabs(1.0f - 3.0f*fabs(a.x)); float ry = fabs(1.0f - 3.0f*fabs(a.y)); float rz = fabs(1.0f - 3.0f*fabs(a.z)); float da = max(rx,ry); float db = max(ry,rz); float dc = max(rz,rx); float c = (min(da,min(db,dc))-1.0f)/s; if (c > d) { d = c; res = (vec4)(d, 0.2f*da*db*dc, (1.0f+(float)(m))/4.0f, 0.0f); } } return (vec4)(res.x,res.y,res.z,0.f); } // GLSL ES doesn't seem to like loops with conditional break/return... inline __attribute__((always_inline)) vec4 intersect( vec3 ro, vec3 rd ) { float t = 0.0f; for(int i=0;i<64;i++) { vec4 h = map(ro + rd*t); if( h.x<0.002f ) return (vec4)(t,h.yzw); t += h.x; } return (vec4)(-1.0f); } __kernel void compiler_menger_sponge_no_shadow(__global uint *dst, float resx, float resy, int w) { vec2 gl_FragCoord = (vec2)(get_global_id(0), get_global_id(1)); vec2 p=-1.0f+2.0f*gl_FragCoord.xy/(vec2)(resx,resy); // light vec3 light = normalize((vec3)(1.0f,0.8f,-0.6f)); float ctime = time; // camera vec3 ro = 1.1f*(vec3)(2.5f*cos(0.5f*ctime),1.5f*cos(ctime*.23f),2.5f*sin(0.5f*ctime)); vec3 ww = normalize((vec3)(0.0f) - ro); vec3 uu = normalize(cross( (vec3)(0.0f,1.0f,0.0f), ww )); vec3 vv = normalize(cross(ww,uu)); vec3 rd = normalize( p.x*uu + p.y*vv + 1.5f*ww ); vec3 col = (vec3)(0.0f); vec4 tmat = intersect(ro,rd); if( tmat.x>0.0f ) col = (vec3)( 0.6f+0.4f*cos(5.0f+6.2831f*tmat.z), 0.6f+0.4f*cos(5.4f+6.2831f*tmat.z), 0.6f+0.4f*cos(5.7f+6.2831f*tmat.z) ); vec4 gl_FragColor = (vec4)(col,1.0f); OUTPUT; } Release_v0.3/kernels/compiler_menger_sponge_no_shadow_ref.bmp000066400000000000000000006000661223142177000247750ustar00rootroot00000000000000BM66(ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵCX|CX|ٵٵٵٵٵٵ`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|ٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵ`J7`J7ٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵ`J7ٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|ٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|`J7`J7CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|`J7`J7`J7`J7`J7CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|`J7`J7`J7`J7`J7`J7CX|CX|ٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵCX|ٵٵٵٵٵٵٵٵ`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|`J7`J7`J7`J7`J7`J7CX|ٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵCX|ٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|`J7`J7`J7CX|ٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵCX|CX|ٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵCX|CX|ٵٵٵٵٵٵٵٵٵٵٵ`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵCX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵCX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵCX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵCX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵCX|CX|ٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵCX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵCX|CX|CX|ٵٵٵٵٵٵٵٵ`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵCX|CX|ٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|ٵ`J7ٵٵٵٵٵٵٵٵٵٵ`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|ٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|ٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|ٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵCX|ٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|ٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵCX|ٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵCX|ٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|ٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵCX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵ`J7ٵٵ`J7ٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵ`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵ`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵCX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵCX|CX|CX|`J7ٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵCX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|ٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵCX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|ٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵCX|ٵٵٵٵٵ`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵ`J7`J7ٵٵٵٵٵٵٵٵٵCX|CX|CX|ٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵ`J7ٵٵ`J7ٵٵٵٵٵٵٵٵ`J7`J7ٵٵٵٵٵٵٵCX|CX|CX|CX|CX|ٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵ`J7ٵٵ`J7ٵٵ`J7ٵٵٵٵ`J7`J7`J7ٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|ٵٵٵ`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7ٵٵٵٵٵٵٵٵ`J7CX|ٵٵٵٵٵ`J7ٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|`J7`J7CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵCX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|`J7`J7`J7`J7`J7CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵCX|ٵ`J7ٵٵٵٵٵٵCX|ٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|`J7`J7`J7`J7`J7`J7`J7CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|ٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|`J7`J7`J7`J7`J7`J7CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|ٵٵٵٵٵٵٵCX|ٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|`J7`J7`J7`J7`J7CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|ٵٵٵٵٵٵٵCX|CX|ٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|`J7`J7CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|ٵٵٵٵٵٵٵCX|ٵ`J7ٵٵٵٵ`J7ٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|ٵٵٵٵٵ`J7ٵCX|CX|`J7ٵٵٵٵ`J7`J7`J7ٵٵٵCX|CX|CX|CX|`J7CX|ٵٵٵٵٵٵٵ`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|ٵٵٵٵ`J7ٵCX|CX|ٵٵٵٵٵ`J7`J7ٵٵٵCX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|ٵٵ`J7ٵٵٵCX|CX|ٵٵٵٵٵ`J7ٵٵٵٵCX|CX|CX|ٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7ٵٵٵٵٵ`J7`J7`J7`J7`J7ٵٵٵٵٵٵCX|CX|CX|`J7CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|ٵ`J7ٵٵ`J7ٵCX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵ`J7`J7ٵٵٵٵٵ`J7`J7`J7`J7`J7ٵٵٵٵٵٵCX|CX|CX|`J7`J7CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|ٵ`J7ٵٵٵCX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|ٵٵٵٵٵ`J7`J7`J7`J7`J7ٵٵٵٵٵٵCX|CX|CX|`J7`J7CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵ`J7ٵٵCX|CX|CX|CX|ٵٵٵٵٵCX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7CX|CX|CX|CX|CX|ٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵCX|CX|CX|`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵ`J7ٵٵCX|CX|CX|CX|ٵ`J7ٵٵٵٵCX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵ`J7`J7ٵCX|CX|CX|CX|ٵٵٵٵٵCX|ٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7`J7`J7CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵCX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7CX|CX|`J7`J7`J7CX|CX|CX|CX|CX|CX|CX|CX|`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵCX|CX|CX|ٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|`J7`J7CX|CX|CX|CX|CX|CX|CX|`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵCX|CX|ٵٵٵٵ`J7ٵ`J7ٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|`J7CX|CX|CX|CX|CX|CX|`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7ٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵCX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵ`J7CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|ٵٵٵٵ`J7ٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵ`J7`J7CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|ٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵ`J7`J7`J7`J7`J7CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|ٵ`J7ٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7`J7`J7CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵ`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7`J7`J7CX|CX|CX|CX|CX|ٵٵٵٵٵٵ`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵCX|ٵ`J7ٵٵٵٵ`J7CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7CX|ٵٵٵٵٵٵ`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵ`J7ٵٵٵٵCX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵ`J7ٵٵCX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵCX|ٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵCX|CX|ٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵCX|ٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵCX|ٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|ٵٵٵٵٵ`J7`J7ٵٵٵٵ`J7`J7`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵ`J7ٵٵٵٵٵٵٵCX|ٵٵٵٵ`J7`J7ٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|ٵٵٵٵ`J7`J7`J7`J7ٵٵٵ`J7`J7`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7ٵٵٵٵٵٵٵٵٵٵCX|ٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵ`J7`J7`J7ٵٵٵٵ`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7ٵٵٵٵٵٵٵٵ`J7ٵ`J7CX|ٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵ`J7`J7`J7ٵٵٵٵ`J7`J7`J7CX|`J7CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵ`J7ٵ`J7`J7ٵCX|ٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|`J7`J7ٵٵٵ`J7`J7`J7ٵٵٵٵٵ`J7`J7`J7`J7CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵCX|ٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵ`J7ٵٵٵٵٵٵٵ`J7`J7`J7`J7CX|CX|CX|CX|`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7CX|CX|CX|CX|CX|`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵ`J7`J7CX|CX|CX|CX|CX|CX|`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵ`J7`J7CX|CX|CX|CX|CX|CX|`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7CX|CX|CX|CX|CX|CX|`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵ`J7ٵٵ`J7ٵٵٵٵٵ`J7`J7ٵٵٵٵCX|CX|CX|CX|CX|`J7`J7CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵCX|CX|CX|CX|`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7ٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵ`J7`J7ٵٵٵٵCX|CX|CX|CX|CX|`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵCX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵCX|ٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵ`J7`J7CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7ٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|`J7`J7`J7`J7CX|CX|CX|ٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵCX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|`J7`J7`J7`J7`J7`J7`J7`J7CX|CX|CX|ٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵCX|ٵٵٵٵ`J7ٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|`J7`J7`J7`J7`J7`J7`J7`J7CX|CX|ٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵ`J7`J7ٵCX|CX|ٵٵٵ`J7ٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|`J7`J7`J7`J7`J7`J7CX|CX|CX|ٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7ٵCX|CX|ٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7ٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|`J7CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7ٵٵCX|CX|ٵٵٵ`J7ٵٵ`J7ٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵCX|CX|ٵٵٵ`J7ٵٵٵ`J7ٵٵ`J7ٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|ٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|ٵٵٵٵٵٵٵٵCX|ٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7ٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|ٵٵٵٵٵٵٵ`J7CX|ٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|`J7CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|ٵٵٵٵٵٵٵٵCX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵCX|CX|CX|CX|`J7CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|ٵٵٵٵٵٵٵٵCX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵCX|CX|CX|CX|`J7`J7CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|ٵٵٵٵ`J7ٵٵٵٵCX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵCX|CX|CX|CX|`J7`J7`J7CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|ٵٵٵٵ`J7ٵٵٵٵCX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵCX|CX|CX|CX|`J7`J7`J7CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵCX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵCX|CX|CX|CX|`J7`J7`J7CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|ٵٵٵٵٵ`J7ٵٵٵCX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|`J7`J7ٵٵٵٵٵٵ`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7ٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|`J7`J7`J7CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|ٵٵٵٵٵ`J7ٵٵٵٵٵCX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|`J7`J7`J7`J7`J7`J7ٵٵٵٵٵ`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|`J7`J7CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵCX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|`J7`J7`J7`J7ٵٵٵٵٵ`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|`J7CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|`J7CX|ٵٵٵٵٵ`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|ٵ`J7ٵٵٵٵٵ`J7ٵٵ`J7ٵCX|`J7ٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵ`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵ`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|ٵ`J7ٵٵٵٵٵ`J7ٵٵ`J7ٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵ`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|ٵٵ`J7ٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵCX|CX|ٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵ`J7`J7ٵٵٵCX|CX|CX|ٵ`J7ٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵ`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵ`J7`J7ٵٵٵٵCX|CX|ٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7ٵٵٵٵٵCX|CX|CX|CX|`J7`J7CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵ`J7`J7ٵٵٵٵٵCX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵCX|CX|CX|CX|CX|`J7`J7`J7`J7CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵ`J7`J7ٵٵٵٵٵCX|ٵٵ`J7ٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵCX|CX|CX|CX|CX|`J7`J7`J7`J7`J7CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵCX|CX|CX|CX|CX|`J7`J7`J7`J7CX|CX|CX|CX|CX|`J7CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵ`J7ٵCX|ٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|`J7ٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵCX|CX|CX|CX|CX|`J7`J7`J7`J7CX|CX|CX|CX|CX|`J7CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵ`J7ٵCX|CX|ٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|`J7`J7`J7`J7CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|ٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵ`J7ٵCX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵ`J7ٵCX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵCX|CX|ٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵCX|CX|CX|ٵٵ`J7ٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵCX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵCX|CX|CX|CX|CX|`J7`J7`J7CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7ٵٵٵٵٵٵٵٵCX|CX|ٵٵٵٵ`J7ٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵCX|CX|CX|CX|CX|`J7`J7`J7`J7`J7CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵCX|CX|ٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵCX|CX|CX|CX|CX|CX|`J7`J7`J7`J7`J7CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵCX|CX|ٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵCX|CX|CX|CX|CX|CX|`J7`J7`J7`J7`J7CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵCX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|`J7`J7`J7`J7`J7CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵCX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵCX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵCX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵCX|CX|ٵ`J7ٵٵCX|ٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵCX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵCX|ٵٵٵٵCX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|ٵٵٵٵCX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵ`J7CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵ`J7CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵCX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵCX|ٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵCX|ٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|ٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|ٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵCX|ٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵCX|ٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|`J7ٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵCX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵCX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7ٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵ`J7ٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵCX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7ٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|ٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|ٵٵٵٵٵٵ`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7ٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵ`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7ٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|ٵٵٵٵٵٵٵ`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7ٵٵٵٵCX|CX|CX|CX|CX|CX|CX|CX|CX|ٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵ`J7`J7`J7`J7ٵٵٵٵٵٵٵٵٵٵٵٵٵٵٵRelease_v0.3/kernels/compiler_menger_sponge_ref.bmp000066400000000000000000006000661223142177000227340ustar00rootroot00000000000000BM66(ljZmnmmno0+"lmnnopC?4lmnnoppqm/*"lmmnoopqqrr/+"llmnnoppqrrss0,#klmmnoo,$( qrsstt1,$/*"kllmnno-%2)/&*"rsttuu2-$/+"~kklmmnop +#0'1(,$stuuvw3.%0+#.)!xwdkllmnnopq  -%sttuvvwxHC81,$/*"<:1~kklmmnoppqr sttuuvwwxyk2-$60&~jkllmnnopqqrsstuuvvwxxyzz3.% .)!}j~kklmmnoppqrrsttuvvwxxyyz{{4/%1,$/*"}i~jkllm%nopqqrsstuuvwwxyyzz{||5/&2-$,'-( ~|i}j~jklm1(+#&ppqrrsttuvvwxxyzz{||}}60'3.% .)!~|h}i~jkkl( ,$1(.%)!qqrsstuuvwwxyyz{{|}}~~61'4/%1,$.*"}{h~|i}j~jklmm )!.%1'+#rrsttuvvwxxyzz1(,$|}~~MH;5/&2-$/+"-( }{g~|h}i}j~kklmnn   qrsstuuvwwxyyz2)7-!4*.&}~|60'3.%0+#.)!|zg}{h~|i}i~jkllmnoo qrrsttu(8&6wxxyzz+#0'5+ 6,!1'~61'4/%1,#.*",' {yf|zg}{h~|i}j~jklmmnoppqrsstu+=*;(9'7xyyz{|  2(8."72( 2-$/*"-( jhX|zg}{g~|h}i~j~kklmnnopqrrstt"/C!.@ ,>*;)9'7yz{{|}  ~82) 3.%0+#-)!{yf|zg}{h~|i}i~j(!llmnoppqrsst%4I$2G#0D!.A -?+<):(8z{||}~~93)61'4/%1,#.*!,'zxf{yf|zg}{h~|i}j+#1(+#mmnopqqrst"/B#0D$2G%3H#1E"/B!-@,>*;(9{|}}~:4*72( 2-$/*",( zxe{yf|zg}{g~|h~}i}j ( -$/&noopqrrs*;,>!-@"/C$1E%3H$2F#0D!.A ,>+<):|}~~QL?82( 3.%0+# *&ywdzxe{yf|zg}{h~|h}i~jk   noppqrs&6(8):+< ,>!.A#0D$2F%3H#1E"/B -?+=*;}~{93)61'4.% .*!+'xvdywdzxe{yf|zg}{h~|i}i~jkl mnopqqrs '6(8*;+= -?"/B#1E%3H!-?  ,>*<:4*72(5/&2-$/*" wucxvdywezxf{yf|zg}{h~|i}j~jklmnnopqrrst   '7(9*;,>!.@     -?+=):;5*82(50'3.%0+# *&vtbwucxvdzxe{yf|zg}{g~|h}i~jkllmnoppqrsstu     ):*< ,>    .&(! ,>*;2(;5+93)61'4.% .)!+'utavubxvcywdzxe{yf,$&~|h}i~jklmmnopqqrstuuv       +=   /')" ,?;0$<1$:/#4*<6,:4*72(4/& /*",( )%\ZLvtbwucxvcywdzxe(!)"/'*"}i~jklmnnopqrrstuvvw           -%/&3*9."=2%7,!1(=7,:5*82(50'  -)!*&usavtbwucxvdywezxe{yf  +#.%~jklmnoppqrsstuvwwx          v  6,!<1$9."3*VPB;5+93)61'.)!+'tr`usavtbwucxvdywezxf{yf|zg  }j~kllmnopqqrstu+#vwxxy           <6+ 71(4/& /*",( )%sq_tr`usavtbwucxvdyxe{yf|zg}{g~|h}i~jklmmnopqrrst1(4*.&wxyyz      =7, 82(50&  -( *&rp_sq_tr`usavtbwucxwdzxe{yf|zg}{h~|h}i~jklmnnopqrsst -%3*2(xyzz{    >8-;5+ 61'  .)!+'qo^rp_sq`tr`utawub!ywdzxe{yf|zg}{h~|i}i~jklmnoppqrstt~j    yz{{|  ~?8-<6+ 71(   ,' )%pn]qo^rp_sq`tsautb+#*"ywdzxe{yf|zg}{h~|i}j~jklmnopqqrstuu  vtbyyz{||}~=2%3).%?9.=7, 82(50&  -( 0+"om\pn]qo^rp_sr`tsavtb &,#zxe{yf|zg}{h~|i}j~kllmnopqrrstuvvwxyzz{|}}~1(7-!<1$6, 0'@:.>7-;5+93)60'  .)! ($nl[om\pn]qo^rq_sr`usavtbrp^  hfV{yf|zg}|h}i~jklmmnopqrsstuvwwxyz{{|}~~  3*9/":/#3)[VG>8-<6+94)71(  ,' )%mkZnl[om\pn]qp^rq_tr`usavtbwuc XVIzxf{zf}{g~|h}i~jklmnno{+#rsttuvwxxyz{||}~   <1$?9.=7,:4*82(50&  -( '#75-mkZnl[om\po]qp^sq_tr`usavtbwucxvdywezyf|zg}{g~|h}i$3#1lmno~-$2)/&stuuvwxyyz{|}}~  =2%3)@:.>7-;5+83)60'  -)!'#($ljYmkZnl[om\po]"sq_tr`usavtbwucxvdzxe{yf|zg}{h~|i(9'6%4$2mnop  -%4*tuvvwxyzz{|}~~A5'A4':/#5+ A:/>8-<6+94)71(4/& .*!+')$kiXljYmkZnl[om\&+#%tr`usavtbwucxvdzxe{yf|zg}{h -?+=):(8&6%4nopq   tuvwwxyz{||}~3)8-!=2%D7)>2%7-!B;/?9.=7,:4*72(5/& /*",( )%&#ihWkiXljYmkZnl[on\  'zxfusavtbwucywdzxe{yf|zg*< ,>"/B ,?+<):'7+=opqr stuvwxxyz{|}}~8.".%  :/#A5'@4':/#B<0@:.=7,;5+83)60' 0+#-)!*&'#hgVihWkiXljYmkZnl[on\qo]   usavtbwvcywdzxe{yf%5'7)9+< ,>!.A ,>*;(9opqrsttuvwxyyz{|}~~2)8-!7-!1(    C7)C<1A:/C<0<6+93)61'4/% .)!+'($geUhgVihWkiXljYmk[nl[on\qo]rp^ tr`usavtbwvcywdzxe!."0$2%4'7)9+< ,?!-@+='7pqrstuuvwxyzz{|}~~ .%4*:/#5+    zubB;/ =6,:4*72(5/& /*",( )%&"fdTgeUhgVihWkiXljYmk[nm[on\qo]rp^sq_tr`usavubxvcywdzxe{yf "0$2%5'7):+< -? qrstuvvwxyz{{|}~    :/#B<0 =7,;5+82) 3.%0+#-)!*&,'ecSfdTgeUhgVihWkiX!mk[nm\on\qo]rp^sq_tr`usavubxvcywdzxe{yf|zg   $2%5'7   rstuvvwxyz{||}~   C<1A:/ <6+93) 4.%1,# +'($%!cbRecSfdTgeUhgVihW!&&nm\on\qo]rp^sq_tr`usavubxvcywdzxe{yf|zg       '7  stuvwwxyz{|}}~ :/#4+ D=1A;/ <6,:4*72(4/&2-$ ,( )%&"baQcbRdcSfdTgeUhfVihW  nm\on\qo]rp^sq_tr`usavubxvcywdzxe{yf|zg~|h         tuvwxxyz|}~~;0#A5'=2%7-!E>2B<0@9.=7,;5*82)2,#3-%0+"-( *&/-&b`QcbRdcSfdTgeUhfVihWkiX  nm[on\qo]rp^sq_tr`utawubxvcywdzxe{yf|zg~|h}i          tuvwxyy;0$1(|}~ 6,!=2%A5';0#¿E>2C<0A:/>8-;6+93) 3.%1,#)%+&($%!a_Pb`QcbRdcSfdTgeUhfVihWjiXljYmkZnl[on\qo]rp^sq_tr`utawubxvcywdzxe' |{g~|h}i~j        lstuvwxy*"/&5+ 5+ }~   >3%>3&¿ÿa[KD=1A;/?9-<6,:4*71(4/&1,$/*",' )%&"_^Oa_Pb`QcbRdcSfdUgeUhfVihWjiXljYmkZnl[on\qo]rp^sq_tr`utawubxvcywd'-$,#~|h}i~jk      rsttuvwxyz  0'6,!~    ¿¿pjXE>2B;0@9.=7,;5*82(50&2-$/+"-( *%'#$ ^\N_^O`_Pb`QcaR!&geUhfVihWjiXljYmkZnl[on\qo]rp^sq_tr`utawubxvcywd ' -$}i~jkl    qrsttuvwxyz{   ¿¿£¤E>2C<0@:/>8-;5+93)61'3.%0+#-)!+&($$!][M^\N_^O`_Pb`QcaR  !hfVigWjiXljYmkZnl[on\qo]rp^sq_tr`usawubxvcywdzxe  }i~jklkhX  pqrstuuvwxyz{| }~$2GA5';0$<2%¤äåĥF?3D=1A;/?8-<6,94*71(4/&1,$.*!+'($%"L][M^\N_^O`_Pb`QcaRdcS  hfVigWjiXkjYmkZnl[on\po]rp^sq_tr`usavubxvcywdzxe{yf ~|h}i~jklm opqrstuvvwxyz{|}~~^nw6,!1('6L&4J%3H¤C7)B6(J<,D7)>2%9."£¤äĥĥŦG@3D>1B;0?9.=7,:4*82(50&2-$/*",( )%&"# ZXK[ZL\[M^\N_]O`_Pa`QcaRdbSedTfeUhfVigWjiXkjYmkZnl[om\po]rp^sq_tr`usavubxvcywdzxe{yf}{g~|h}i~jklmnopqrst,$vwxyz{|}~~`qz_py^ox8."@4&;0#5+ -?Y*:R)9P'7M&5K%3I$2F 9/"@4&E8*H;+A5';0#¤äåĥŦŦƧH@4E>2C<0@:.>8-;5+83)60'3.%0+#-)!*&'#($YWIZXJ[ZL\[M^\N`_Pa`QcaRdbSedTfeUhfVigWjhXkjY+nl[om\po]rp^sq_+#%vubxvcywdzxe{yf|{g~|h}i~jklmnopqrs-$/'6, 0'wxyz{|}~bt}ar|`qz_py^ox 4*:/#@4&9."0B].@Z,>W+2%åĥĦŦƧƧǨHA4F?3C=1A;/?8-<6+94)71'4/%1,#.)! ($%!"ZXJ[YK\[L"caRa`QcaRdbSecTfeUhfVigWjhX"0!.,om\po]rp^ #( (!xvcywdzxe{yf|{g~|h}i~jklmnopqrst  /&6, xyz{|}~fxevcubt}as|`r{_pz^ox   ;0$=2%Ƨ.@Z0B]0C^/A[-?X,W.@[0C^0B].@Z,=V+;T)9Q(7N'6L%4I¿(    åĥŦŦƧǧǨȨȩɩG@4 C<0@:.>7- 83)50' 0+#-( '#$ \ZL]\M  baRdbSecTfdUgfV!/#1%4'7%5$2"0qp^sq_tr`  wvcywdzxe{yf|zg~|h}i~jklmnopqrstum xxyz{|}~k~j}i|hzgyewdvcu~bt}ar|`qz %3H&5J'7M)8O*:R,8-<6+93)61'4.%1,#.)!  %!"\ZL]\M^]N a_PbaQcbRecTfdU(*,!.#1%4'6%5qo^rq_tr`usavtbwvcywdzxe{yf|zg~|h}i~jklmnopqrstuvwxyyz{|}nmlk~i|h{gyfxewcubt}`q{(#0D$2G%4I&5K(7M)9P*;S,=V-?X/A\0B].@Z  )9Q(7N'5K¿£¤äĥĥŦŦƧǨǨȩȩɪʪʪ˫˫IA5F?3 B;/ =6,5/%72(4/&1,#/*",' )%&"#][M^]N_^Oa_Pb`QcbRecSfdT  (*,!."0$3&6rq_tr`usavtbwucywdzxe{yf|zg}|h}i~jklmn-%pqrstuvwxyyz{|wrponlk~j}i{gzfxew     %4I&5K(7N)9P+;S,=V!   ;1$*:R(8O'6L¤äåĥŦŦƧǧǨȨȩɩɪʪ˫˫̬̬JB5G@4 B<0=7, 82)50'2-$0+"-( *% $  ^\N_^O`_Pb`QcbRdcSfdTgeU     + -  tr`usavtbwucxwdzxe{yf|zg}{h}i~jklm' -$3)qrstuvwxyzz{wutrqpnmlj}i|hzgy       '6K(7N)9Q    <1%5+ )9P(7N£¤äĥĦŦƧƧǨȨȩɩɪʪʫ˫ˬ̬ͭͭJC6HA4F?2C<1A:/D=0<6+93)61'3.%0,#.)!+& $!!^\N_^O`_Pb`QcaRdcSedTgeUhfV         ,usavtbwucxvdzxe{yf|zg}{h}i~jklm  ,$rstuvwxyz{zywvusrponlk~j|h{gyfx  4+ #    (8O*:R  =2&=2%5,!,=V(8O¤äåĥŦŦƧǨǨȩȩɪʪʫ˫ˬ̬̬ͭͭήKC6IA5F?3D=1A;/<6,:4*71(4/&1,$.*"+' %""_]O`_Pa`QcaRdcSedTgeUhfV^\N        "0vtbwucxvdzxe{yf|zg}{h~|i~jklmn  rstuvwxyzvxzzxwutrqpnmkj}i|hzfydu ,$5,!%       <1%>3&.@[*;S)9PäĥĦŦƧƧǨȨȩɩɪʪ˫˫̬̬ͭͭήήϯibQIB5G@3E>2B<0=7,:5*82(50&2-$/*",(  #  `^Pa`QcaRdbSedTfeUhfVigW         vtbwucxvdzxe{yf|zg}{h~|i~jklmno qrstuvwxtqrtvwyywvtsrpomlk~i|h{gyfx cu~bs}(        -?Y/A[->X+2C<0;5+93)60'3.%0+#-)!*& $  a`QbaRdbSecT"hfVigWjhX        vtbwucxvdywe{yf|zg}{h%~jklmnopqrstuvwi|k~lnprsuwyxw trqonmk~j}i{gzfxewdubt}+      .?Y,=V*;SŦƧƧǨȨȩɩʪʫ˫ˬ̬̬ͭέήϮϯЯаѰѱKC6HA4F?3D=1<6+94*71(4/%1,# +' a_PbaRdbS &igWjhXkjY      tr`utawubxvdywe{yf|zg*"0'~kklmnopqrstuvdvfxgzi{j}lnoqstdu~   sqpomlj}i|hzfyewdvct~9/# ,      +2:4*72(5/&2-$ ,( # baQcbRecT  ljZjhXkiY     sq_tr`usawubxvcywezxf|zg  )!~klmnopqrstu_pzar{bt}duewgyh{j}kmopr    3*-%rqonlk~j|h{gyfxdvcu~B5(D7)=1%*      ƧƧǨȨȩɪʪʫ˫ˬ̬ͭͭήD8)?3&ϯЯѰѰұұӲӲLE7JB5H@4E>2C<0;5+83) 3.%0+#-( $ !cbRdcSSQD hgWjhXkiYljZ   qp^sq_tr`usavubxvcywdzxe{zf}{h   lmnopqrscv[kt\mu]nw_py`q{bs|cu~evfxhzi|k~mnpr   4+ -%,$qpnmkj}i{hzfxewdu 6+ ;0$B5(B6((       ĦŦƧǨǨȩɩɪʪ˫˫̬̬ͭE8*M>.N@/H:+A5'аѰѱұҲӲӲԳME8KC6HA4F?3C=1   93) 4.%1,# +& %!!caRdcSfdTgeUhfVihWkiYljZml[ljZpn]qo^rq_tr`usavtbwvcywdzxe{yf}{g~|h ~jkmnopqrUemVfnXgpYiqZjs[lu]mv^ox`qzar|ct~dvfxgzi{j}lmoq ,$vusrponlk~i|hzgyewdv   D7)%      åĥŦŧƧǨȨȩɪʪʫ˫ˬ̬ͭͭ A5'G:+O@0K=-D7)PB1ұұӲӲԳԳմNF8KD6IB5G@3D=1   :4* 4/&!!/*",'  %""dbSedTgeUhfVihWjiXljYmk[nm\pn]qo^rp_tr`usa"wucywdzxe{yf|zg~|h}i~jklmnpqQ`hRaiTckUdlVenWgoXhqZjr[kt\mv^nw_pyar{bs}cuewgyhzi|k~mnprtvtsqpnmk~ h{gyfxev     $  äĥĦŦƧǧǨȩɩɪʪ˫˫̬̬ͭέή   J<,N@/G:+A5'ӲӲԳԳճմִlfTLD7G@4E>2    82(50& !!/+"-( )% #  edTfeUhfVigWjiXljYmkZnm[pn]qo^rp_sr`'-%zyfxwdzxe{yf|zg~|h}i~jklmnop$),Q_gRahSbjTckUemWfnXgpYir   _ox`qzas|ct~dvfxgzi|j}lnpqsusrp%    hzfxew   "   £¤åĥŦŦƧǨȨȩɩʪʫ˫ˬ̬ͭͭήϮϯ   K=-D8)ԲԳճմִֵ׵mME7HA4F?2    93) 3.%0+#-)!*& $!!jiXfeUgfVigWjhXkjYmkZnl[on\qo]rp_sq` &xvdzxe{yf|zg}{h}i~jklmnopq RaiTckUdl     0(^nw_pyar{bt}duewgyh{j}lmoprt        gyew £äåĥŦŧƧǨȩȩɪʪʫ˫̬̬ͭέήϮϯЯ   ȩԳԳմִֵ׵׵׶մMF8  F?3    :4*71(4/&!##.*!+' %!"fdUgfVigWjhXkiYlkZnl[om\po]rp^sq_tr`   yxe{yf|zg}{h~|i~jklmnopqr  !!Tck  +#/'' ]nw^ox`qzas|cu~dvfxhzi|k~lh{         fx¿¤äĥĦŦƧǨǨȩɩɪʪ˫ˬ̬ͭͭήϮϯЯаѰ  /'ӲԳԳմմִֵ׵׶ضطٷNF8LD7 G@3E>2  :5*82(50&"##/*",( )%&"# geVhgWjhXkiYlkZnl[om\pn]qp^sq_tr`usa xvcywe{yf|zg}{h~|i~jklmnopqrs  !! !! ,%XhqZjr[kt\mv^ox_pyar{bt}dvfxgy            +#¿¤äĥĦŦƧǨȨȩ>2%ʪʫ˫̬̬ͭέήϮϯЯѰѰұұӲԲԳճմִֵ׵׶ضضٷٷڸOG9LE7 HA4E>2   ;5+93)60'"$$"$$-( *&'#!hfVihWkiYljZvtaom\pn]qo^sq_tr`usavubxvcywdzxf|zg}{h~|i}jklmnopqrs).2  !! ""!""VfnXgpYirZks\lu]nw_py`q{bs}cu~              z¿£¤åĥŦŦƧǨG:+M?/H;,A5'˫˫̬ͭͭήήϯЯаѰұұӲӲԳԳմմֵ׵׵ضضٷٷڸڸ۹OG9ME8 IA4F?3D=1  <6+94)71'#%%#%%.)! ($ "hfVihWjiX #)!pn]qo^rq_tr`usavtbwvcywdzxe{yf}{g~|i}j~klmnopqrst*03  !! !! ""!##"##XhqZjs[kt]mv^ox_pzar{bt}             ¿¿£äåĥŦƧǧ<1$A5'I;,M?.F9*̬ͭͭͭήϮϯЯѰѰұұӲԲԳճմִֵ׵׶ضطٷٷڸ۸۹ܹPH:NF8LD7IB5G@3D>2  :4):4*72($&&$&&$'',' )%&""igWjiXljY   qo^rp_sr`usavtbwucywdzxe{yf|zg~|h}i~jkmno2)/&rstu !   !! ""!""!##"$$#$%[kt\lu]nw_py`q{bs}            ¿¤äåĦŦƧǨǨ   I<,J<-ضͭέήϯЯаѰѱұӲӲԳԳմִֵ׵׶ضضٷٷڸ۸۹۹ܺܺpiVNG9LE7 H@4E>2C<0  ;5+83)50'%''%''-( *%&##  jhXkjYmkZ  qo^rp_sq`tsavtbwucxvdzxe{yf|zg}|h}i~jklmn +#1(stuv!" !    !! !! ""!##"#$"$$#%%[kt]mv^ox`qzas|       (9,? .A¿¤äĥĦŦƧǨȨȩ$    ;1%ήϮϯЯаѰұұӲԲԳճմִֵ׵ضضٷٷڸڸ۹۹ܹܺݺݻĥOG9ME8KC6HA4F?3C=1  <6+93)61'%((&((-)! '#$!!nm\kiYlkZ om\po]rp^sq_tr`utbwucxvdyxe{yf|zg}{h}i~jklmn   stuw!#!" !    !! !! ""!##!##"$$#%%#%%$&&]nw_py`q{      '7*; -@!.A¿¤äĥŦŦƧǨȩɩɪ"    wήϮϯаѰѱұӲӲԳԳմִֵ׵׶ضطٷٷڸ۸۹ܹܺݺݻ޻޻޼PH:NF8KD7IB5G@3D=1B;0 <6, 71(&()'))'))+'($%!"kiYljZml[om\pn]qp^sq_tr`usawubxvcywe{yf|zg}{h~|i~jklmno  stuv/59 "$!#!" !     !! "! ""  "$$#%%$&&$&'%''`qz     '8*< -@!.A¿¤åĥŦŧƧǨȩɩɪʫ  ͭέήϯЯаѰұұӲӲԳճմִֵ׵ضضٷٷڸڸ۹ܹܺݺݺݻ޻޼߼߼PH:NF9LD7JB5G@4E>2B<0 =7, 82(')*'**(**,( )%&"# ljYmk[nm\pn]qo^rq_tr`usavtbxvcywdzxe|zg}{h~|i}jklmnop rstuvw07: #% "$!#!"        "$$#%%#%&$&&%''&((    (9*= .Al¿C7)=2%¤åĥŦƧǧǨȩɩʪʫ˫̬̬ͭήϮϯЯѰѰұҲӲԳԳմִֵ׵׶ضطٷڷڸ۹۹ܹܺݺݻ޻޻߼߼QI;OG9ME7JC6HA4F?2C<1A:/>8-<6,93)(*+(++(++)+,*&'#$ mkZnl[pn]qo^rp_sr`usavtbwucywdzxe{yf}{g0'+#~klmnopqrstuvwx"%'!$& #% "$!#         "$$"$$#%%$&&$''%'(   -@hzi|k~:/#@4&F9*I;,B6(åĥŦƧǨǨȩɪʪʫ˫̬ͭͭήϮϯаѰѱұӲӲԳճմִֵ׵ضضٷٷڸ۸۹ܹܺݺݻ޻޻߼߼RI;OH:MF8KD6IA5F?3 A;/ <6, 71(),,),,)+,+'($%! mkZnl[on\qo]rp_sq`tsavtbwucxvdzxe{yf ( .%klnopqrstuvwxy#&("%'!$& #% "$!#      !! ""!##!##"$$#%%#%&$&&%''&))')*(**ewgyi{¿  @4'G9*H:+ĦŦƧǨȨȩɪʪ˫ˬ̬ͭͭήϯЯаѰұұӲԲԳճմֵ׵׶ضطٷڷڸ۹۹ܹܺݺݻ޻޼߼߼RJ2 @:.>7-;5+83)&))&((%((%''*%#  om\pn]qp^sq_tr`usawubxvcywe{yf|zg}{h   lmnopqrtuvwxyz:CH%(*$')#&("%'!$& #%"$!# !  !! !! ""!##"#$"$$#%%$&&$&'%''&((&))')*(*+¿     ƧǨȩɩɪʪ˫̬̬ͭήϮϯЯѰѱұӲӲԳճմִֵ׵ضضٷٷڸ۹۹ܺܺL>.SD2SD2L>.F9*QI;OG9ME8KC6HA4F?3 A;/>8-<6+93)%''$&'$&&#%&0+" $!! nm\pn]qo^rq_tr`usavtbxvcywdzxe|zg}{h~|i}jklmnopqrstuvwxy{7?C&)+%(*$')#&("%'!$& #%"$!" !   !! ""!""!##"$$#%%#%&$&&%''%((&()'))¾¿£   ŦƧǨȩɩɪʫ˫̬̬ͭήϮϯаѰѱұӲӲԳճմֵ׵׶ضٷٷڸڸ۹ܹܺݺ  G:+N?/VF4PA0J<,RJ;PH:NF8KD7IB5G@3 B;0?9.=6,:4*#%&#%%"$%"$$"#$($%!" on\qo^rp_"&,$wucywdzxe{yf}{g~|h}j~klmnopqrstuvwxyz{9@E'+,&*+%(*$')#&("%'!$& #%"#!" !   !! !! ""!##"#$"$$#%%$&&$&'%'(&((¿¿¤ĥŦŦƧǨȩɩʪʫ˫̬ͭͭήϮϯаѰұұӲԳԳմִֵ׵ضضٷٷڸ۸۹ܹܺݺݻ!  PA0TD3M?.G9+SJ8-;5+93) ""!!!!  '#$ !qp^sq_tr`   ywe{yf|zg}{h}i~jklm+=):(8&6stuvw7-"-$z{|}*.0)-/(,.'+,&*+%(*$')#&("%'!$& #$"#!" !         !! ""!##"#$"$$#%%$&&¤äĥŦŧƧǨȩɩʪʫ˫̬ͭέήϯЯаѰұӲӲԳճմִ׵׶ضٷٷڸ۸۹ܹܺݺݻ޻߼߼3)     TK=RJ; MF8KD6IB5F?3 A;/?8-<6,94*  ($%!!qo^rq_tr`  xvcywdzxf|zg}{h~|i~jkl#0D"/B!-@+=*;rtuv)!.&5+ 4*{|}Yhq*.0*.0)-/(,.'+,&)+%(*$')#&("%'!$% #$"#!" !        !! ""!"#!##"$$#%%¤äĥŦŧǧǨȩɩʪ˫ˬ̬ͭέήϯЯѰѱұӲӲԳճմֵ׵׶ضٷٷڸ۹۹ܺݺݺ޻޻߼߼Ϯ   TL=RJ<  LD7IB5G@3E>2B;0@9.=7,:4*%""rp_sr`usavtbwucywdzxe{yf}{g~|i}j(8):+= -?"/B"0C!.@ ,>stuv   z|}~:BF)-/*./*.0)-/  &)+%(*$')#&("%'!$% #$"#!" !     !! !! ""!##"#$¤äĥŦŧǧǨȩɪʪ˫ˬ̬ͭήήϯЯѰѱұӲԲԳմִֵQB1H;,B6(ٷٷڸ۹ܹܺݺݻ޻޼߼ vn[SK<  LE7JC6HA4E>2C<0@:/>7-;5+&# sq_tsavtbwucxvdzxe{yf|zg~|h"0#2%4&6(8*;,>!.A#0D!.Atuvw   {|}~9AE),.)-/)-/*-/   &)+%(*$')#&("%'  #$"#!" !   !! ""äĥŦŧǧǨȩɪʪ˫ˬ̬ͭήϮϯЯѰѱұӲԳԳմ@4'F9*L>.TD3M?.G9+ڸڸ۹ܹܺݺݻ޻߼߼īTK.SC2K=-۸۹ܹܺݺ޻޻߼߼TL=RJ; NF8KD7IB5G@3D=1B;0?9.<6,:4*%!"tr`usavtbxvcywdzxf|zg}{h~|i       (8):uvwx z{|}~'*,'+-(+-(,-    (,.'+,&)+%(*     #$"#!" !  rŦŧǧǨȩɪʪ˫ˬ̬ͭήϮϯаѰұұӲԳճմִ׵   PA0۹ܺݺݻ޻޼߼UL=SJ2B<0@9. ;5*  &"# usavtbwucywdzxe{yf}{g~|i}j         uvxyz{|}~~&*+'*,'*,'+,(+-$(*  )-.(,.'*,&)+      #$"#!" !   ŦŧǧǨȩɪʪ˫ˬ̬ͭήϮϯаѰұҲӲԳճմֵ׵׶     ܺݺݻ޻߼߼UM>SKTK=RJ;PH:MF8KD6IA5F?3D=1A;/ <6+$!!usawubxvdywe{yf|zg}{h}i~jk          wxyz{|}~5<@%(*%)*%)+&)+&*+&*,'*,'+,'+-(+-(,-(,.        "#!" !   £äĥĦŦƧǨȩɪʪ˫ˬ̬ͭήD8)?3&аѰұӲӲԳճմֵ׵ضضٷ ܹܺݺ޻޼߼߽xp]TL=RJ2B;0?9.=7,%""vtbxvcywdzxf|zg( ~|i~jk         *;xyz{|}~$')$()%()%(*%)*%)*&)+&)+&*+'*,'+,'+-(+-         "# "!   |¿£äĥĦŦƧǨȩɪʪ˫ˬ̬A5'H;,PA0K=-D7)ѰұӲӲԳմִֵ׵ضطٷڸ۸۹ܺݺݻ޻޼߼xq]UL>SK2C<0@:.=7,;5+&"#wucxwdzxe$*"0'}j~kl         xyz{|}~#'($'($')$')$()%(*%(*%)*&)+&)+&*+'*,'*,          !# "! q¿£äåĦŦƧǨȩɩʪ˫ˬ̬   H:+PA0J<-ұӲӲԳմִֵ׵ضطٷڸ۸۹ܺݺݻ޻߼߼ɧVM>SK8-<6+ '#$  wucxvdzxejhW  o~jkm         xyz|}~#&'#&(#&($'($')$')$()%(*%(*%)*&)+&)+&*+         "$!# "Xit¿¤åĦŦƧǨȩɩʪ˫ˬ̬ͭ    O@/ӲӲԳմִֵ׵ضطٷڸ۹۹ܺݺݻ޻߼߼VM>TL=RJ;PH:NF8KD7 G@3D=1B;/?9.<6, ($%!"xvcywezyf   ~jklm        wxyz{|}~18<"%'"%'#&'#&'#&(#'($'($')$()%()%(*%(*         Xhs¿ÿ¤åĥŦƧǨȩɩʪ˫ˬ̬ͭή    ӲԳմִֵ׵ضٷٷڸ۹ܹܺݺݻ޻߼߼/A[.?Y->WWN?UL=SJ2B<0@9.=7,   "ywdzxe{yf  }jklm      uvwxyz{|}~18;"%&"%&"%&"%'#&'#&'#&(#'($'($')$')$()     Zku¿¤åĥŦƧǨȩɩʪʫ˫̬ͭήϮ    Գմִֵ׵ضٷٷڸ۹ܹܺݺ޻޻߼߽4Ge2Fb1D_0B].@Z-?XWN?UM>SK8-#  zxezxe{yf|zg~|h}i~jlmn    stuvwyz{|}~4+ ;0#5+ !$%!$%!$%!$&"%&"%&"%'"&'#&'#&(#&(#'($')   ¿¤äH:+A5'аǨȨɩɪʫ˫̬ͭήϮϯ  ӲӲԳմִֵ׵ضٷٷڸ۹ܹܺݺ޻޻߼;Rs9Op8Ml6Ki4If3Gc2Ea0C^/A\.?YO@0H;,?3&XO@VM>TK=RI;OH:ME8KC6 F?3D=1A;/?8-'#$!! ywe{yf|zg}{h}i~jklmo qrstuvwxyz{}~  0'8-! #$ #$!#%!$%  "%&"%&"%&"%'#&'#&'#&(   ¿7-!<0$B6(H;,H;,ǨȨɩɪʫ˫̬ͭέϮϯаѰұӲӲԳմִֵ׵ضٷٷڸ۹ܹܺݺ޻޼߼9Oo;Qs2B;0?9.=7,($%!"zxe|zg}{h~|i}jklmnoprstuvwxyz{|}~    "$ #$ #$ #$   !$%!$&"%&"%&"%'"&'¿£    H:+Ȩȩɪʫ˫̬ͭέήϯаѰұӲӲԳմִֵ׵ضٷٷڸ۹ܹܺݺ3Fc4He5Jh7Lk9Nn:Qr2C<0@:.=7,)%&"# {yf|{g~|h}i~klmnopqrstuwxyz{|}~   .58"#"#"# #$    !$%!$%¿£ä     ȩɪʪ˫̬ͭͭήϯЯѰұҲӲԳմִֵ׵ضٷٷڸ۹ܹ,=V-?X.@Z/B]1D_2Fb4He5Jh7Lk9Nn:Pq.WO?UM>SK8-'#$  {yf|zg}{h}i~jklnopqrstu3*.%xyz|}~   -47!"!#"#"#   ¾ÿ¤å    ɪʪ˫̬ͭͭήϯЯѰұұӲԳճִֵ׵ضٷٷڸ۹ܹ%->W.@Z/B\1C_2Eb4Ge5Ig6Kj8Nm:PqRJ;PH:KD6IA5F?3D=1A;/?9-($%!!|zg}{h~|i~jklmnoqrst *"0'6, yz{|}~ !!"!"!"!"Ϯ=1$¿¤åĥ   )"ɩʪ˫̬̬ͭήϯЯѰѱұӲԳճմֵ׵ضٷٷڸ۹ܹܺ& /A[0C^2Ea3Gd$    OA1G:,4Hf3Fc1D`      YP@WN? RJ2 @9.0,$($%"}{g~|h}j~klmnopqrst   yz{|}~ ! !7-!<1$D7)E8*¿¤äĥŦ Ǩȩɩʪ˫ˬ̬ͭήϯЯѰѱұӲԳճմֵ׵ضطٷڸ۹ܹܺݺ(  1D`3Fc$  H;,PB1G;,@5(4Ge2Eb(      YP@WN?  QI;  JC6HA4E>2 @:.>7-)%&"  |zg}{h}i~jklnopqrstu   y{|}~   A5'¿£äĥŦƧǨȨɩɪʫ˫̬ͭήϮЯаѱұӲԳճմֵ׵ضطٷڸ۹ܹܺݺ޻*     G:,PB1H;-6Kj4If3Fc#!ZQAXO?  QI;"#KC6HA4F?3C=1 >8-<6+*&'#$ !}{h~|i~jkl2)( oqrstu    z{|}~    ¿£äĥĦŧǧǨȩɪʫ˫̬ͭήϮϯаѰұӲԳԳմֵ׵ضطٷڸ۹ܹܺݺ޻޼ ,   "7Lk9Nn9Oo7Ll5Jh4He:RuұZQAXO@  RJ;PH:%$IB5G@3D=1 ?9.<6, ($%!"~|h}j~k$(!-%opqrsuv xyz{|}~!    ¿¤åĦŦƧǨȩɪʪ˫̬ͭέήϯаѰұӲԲԳմִ׵ضضٷڸ۹ܹܺݺ޻޼߼!.   !"$%8Nn6Kj5Ig3GeЂzeYP@WN? SJ2 @9.=7,:4* &"" }i~jk   pqrstuvwxy{|}~D7)=1%7-!    ¿¤åĥŦƧǨȩɩʪ˫̬ͭͭήϯЯѰұӲӲԳմִ׵׶ضٷڸ۹۹ܺݺ޻޼߼#0   !"$%%#4HeִYPAWN?UM>SK8-;5+*%&##  ~|i~jkl   qrstuvwxyz{|} 3*9."@4&¿¤äĥŦƧǨȩɩʪ˫ˬ̬ͭήϯЯѰұҲӲԳմִ׵׶ضٷڸ۸۹ܺݺ޻޼߼$2  !"$  ZQAXO@VM>TK=RI;F?3D=1A;/>8-<6+93)*&'# !}j~kl   pqrstvwxyz{|}~    ¿£äĥŦƧǨȨɩɪʫ˫̬ͭήϮЯѰѱұӲԳճִֵ׶ضٷڸ۸۹F9*@4'޻޻߼&5!       ZQAXO@VN>TL=RJ;G@3D=1;6,?9.<6,:4*'#  "~jkl opqrstuvwxy{|}~    ¿¤åĦŦƧǨȩɪʫ˫̬ͭήϮЯаѱұӲԳճմֵ׵ضٷH;,I<,QB1TD2L>.E9*޻߼'7#!         [RBYP@WN?UL=SJ<   G@4E>2B<0@9.=7,;5*82()%  }jklmnoqrstuvwxyz{|}~2)    ¿¤åĥϯƧǨȩɪʪ˫̬ͭέήϯаѰұӲԳԳմֵ׵ضٷ  H;+PA0TD3OA1߼&4$#!       [RBYPAWO?UM>SK8-;5+83)*&'# ~klmnopqrstvwxyz{|}~/&4*:/#<0$  ¿£*;S)9P(7NƧǨȩɩʪ˫̬̬ͭήϯЯѰұӲӲԳմִ׵ضطٷ"   SE3߼$2"$""      wbZQAXO@VM>TK=RI;    F?3D=1A;/?8- 93)61'+&'#!klmopqrstuvwxyz|}~    ¤0C^/A[-?X,=V+;S)9QǨȨɩɪʫ˫̬ͭήϯЯѰұұӲԳմִ׵׶ضٷڸ'     K>/#0!"#        xcZQAXO@VN>TL=RJ;    G@3D>2B;0?9. :4*72(+'($"lmnopqstuvwxyz{|}~v   3Fd1C_2Fb4He2Fb1C_/A\.?Y,=V+;TǨȩɪʪ˫̬ͭήϮЯA5'ѱұӲԳճִֵ׶ضٷڸ۸&    !/ !    !     ͬ[RBYP@WN?UL=SJ<    G@4E>2C<0@:.=7,;5*82(50&'#)%  lmnopqrstuvxyz{|}~   (7N)9Q*;S,=V.?Y/A\1D_3Fc3Fc1D`/B\.@Z,>Wȩɩʪ˫̬ͭаG:+M?/M?/F9*ұӲԳճմֵ׵ضٷڷ۸۹"   -     !"#"!   ,?[RBYPAWO?UM>SK<    HA4F?2C<1A:/>8- 93)60'*&  !lmnpqrstuvwxyz.%3)9."   %4I'6L(8N)9Q+;S,=V.?Y/B]1D`3Fc1D`  ɩʪ˫ˬ̬  C7(J<-PA0ӲԲԳմֵ׵ضٷٷڸ۹ܺ ޻߼+   " !  Ħ\SBZQAXO@VM>TK=    F>2F?3D=1A;/?8- 94)71'4.%+&($ !mnopqrtuvwxyz       (8N*:Q+2B;0?9.=7,:4*72(4/&,'($ "lnopqrstuvwxz{        +2C<0@:.=7,;5+82(50&2-$,( )%&"# mnoqrs*"uvwxyz{   "0        3Hf˫̬̬ͭή$    ճմֵ׵ضٷڸ۸۹ܺݺ޻߼߼'ݻVF4PA0J=-ׁyd[RBYPAWO?UM>SK<HA4 C=1A:/>8-;5+93) 3.%,''# nop )!/&tuwxyz{|  w%         ʫ˫̬ͭήϯ    մֵ׵ضٷٷڸ۹ܺݺ޻޼߼N@/TD3[J7cP;[J7TD3N?/ׁzd\SBZQAXO@VM>TK=JC7 IA5D=1A;/?9-<6+94* 4/%1,# ($%!!onpq   uvwxyz|}~&          ˫̬ͭήϮЯ$ұӲԳմִ׵׶ضٷڸ۹ܹݺݻ޻߼ UE3\K7`N:YH6RC2ɨ\SCZQAXP@VN?TL=RJ<   IB5G@3 B;0?9.=7,:4* 5/&2-$,' )$%""nopq   vwxyz{|}~           ̬ͭͭήϯаѰұӲԳճִֵ׶ضٷڸ۹ܹܺݻ޻߼߽  "%^M9WG4bQ=]SC[RB$WN?UL=SK7,;5+82)50&2-$/+",( )%&"#  opq    vwxz{|}~έ          ̬ͭήϯЯѰұӲԲԳմֵ׵ضٷڷ۸۹ܺݺ޻߼߼  "  O@0]TC[RB UM>SK8-<6+93) 3.%0+#-)!*&'#$ !opr  uvwxyz{|};0$A5'F9*?3&         ͭήϮЯѰѱұӲԳմִ׵ضطٷڸ۹ܺݺ޻޼߼M?/.$      ^TD\SB  VM>TK=RJ;PH:  IB5 D=1A;/?9-<6,94*71(4/%1,#.)!+'($%!"opqrstuwxyz{|}~#1D"/B!.@   A5'       ͭήϯаѰұӲԳճִֵ׶ضٷڸ۹ܹܺݻ޻߼XG5_M9XG5QB1K=-+" #    ^UD\SC  VN?TL=RJ2B;0@9.=7,:4*72(5/&2-$/*",' )%%""pqrstuvwxyz|}~(7N)9Q(7N'5K%4I$2F#0D    +       ήϯЯѰұӲԲԳմֵ׵ضٷڸ۸۹ܺݺ޻߼߼ J<,PA0WG4]L8XG5QB1& !    ڃ{e]SC  WN?UL=SK7-;5+82)50'2-$0+",( )% #  pqrsuvwxyz{| ,?!.A#0D$2F&4I'6M)9P(7N&5K*<6,!    )        ϮЯаѱұӲԳմִ׵ضطٷڸ۹ܺݺ޻޼߼  ![J7XG52) ܹ]TC[RB WO?UM>SK8-<6+?8-61'3.%/*"-)!*&'#$ !pqrstuvwxy{  )9*; ,>!.@"0C$1E%3H'6L(8O5+ ;0#A5'<1$    '      ϯаѰұӲԳճִֵ׶ضٷڸ۹ܹݺݻ޻߼    !%^TD\RB PF8VM>TK=RJ;PH:MF8KD6IA5F?3D=1A;/ <6,71(4/& .)!+'($%!"pqstuvwxyz{        -?"/B#1E      |¿     '7NЯѰұӲӲԳմֵ׵ضٷڸ۸۹ܺݺ޻߼߼   ^UD\SCZQA VN?TL=RJ2B;0 =7,5/&2-$/*",' )%&"qrstuvwyz{%               ¿      έήϯаѱұӲԳմִ׵ضطٷڸ۹ܺݺ޻޼߼UE3ZI6RC2L>. !# ^UD]SC[RBKD8WN?UL=SK2C<0@:.>7-  50'2-$0+" *%&## rstuvwxyz{               ¿      ʫ˫̬ͭήϯЯѰұӲԳճմֵ׶ضٷڸ۹ܹܺݻ޻߼  J<-QB1XG5TE3  _UE]TC[RBYPAWO?UM>SK<#OG9ME8KC6HA4F?3C=1A:/ <6+  3.%0+#-)!*&'#$ !qrstuwxyz{|                   Ǩȩɩʪ˫̬ͭήϮЯѰѱұӲԳմֵ׵ضٷٷ۸۹ܺݺ޻߼߼   !  _VE]TD\RBZQAXO@VM>TK=RJ; MF8KD6IA5F?3D=1A;/ <6,  4/&1,#.)!+'($%!"rstuvwxyz{}             #¿ ĦŦƧǨȩɪʫˬ̬ͭήϯаѰұӲԳճִ׵׶ضٷڸ۹ܹݺ޻޼߼     ^TD\SCZQAXO@VN>TL=RJ; NF8LD7IB5G@3E>2B;0?9.=7,  5/&2-$/*" )%&"#rstvwx}z{|!           ¿£äĥŦƧǨȩɪʪ˫̬ͭήϮЯѰұӲӲԳմֵ׵ضٷUF4H:+B6(ܺݻ޻߼  ݃{e\SC[RBYP@WN?UL=SJ< NG9LE7JC6H@4E>2C<0@:.>7-   2-$0+" *%'## rstu*"/&5+ yz|}#           ¿¤åĦŧǨȨɩʪ˫̬ͭͭήϯаѱұӲԳմִ׵>3&C7)J<-PA0RC2K=-ݺ޻߼߼ݺ " ]TC[RBYPAWN?UM>SK< OG9ME8JC6HA4F?2C=1A:/>8-   3.%0+#-)!*&'#$!!rsuv   z{|}          ¿¤äĥŦƧǨȩɪʫ˫̬ͭήϯЯѰұӲԳճմֵ׶    ݻ޻߼Ǩ   \RBZQAXO@VM>TK=  MF8KC6IA5F?3D=1A;/?9-<6,  4/&1,#.)!+' %!"stu    z|}~           ¿¤åĦƧǨȨɩʪ˫̬ͭέϮЯаѱұӲԳմִ׵ض     ݺ޻߼/&  ܺZQAXO@VN>TL=RJ; NF8LD7IB5G@3E>2B;0?9.=7,  5/&2-$/*",' )%&"#ytuv   {|}           ¿¤äĥŦƧǨȩɪʫ˫I<-?3&ήϯЯѰұӲԳճִֵ׶˫    ޻޼߼OA1}vaYP@WN?UL=SJ< NF9LD7JB5G@4E>2C<0@:.=7,  50'2-$0+"-( *%'## stuv   {|}~          ¿¤ĥŦƧǨȩɩ;0$@4'G:+M?/K=-ϮЯѰѱұӲԳմֵ׵ض,# !޻߼  PB1XI6XH6PB2I=-{t_WN?UM>SK8-  4.%93(0+#-)!*&&"$!tuvwecTyz{|}~           ¿¤åĥŦƧǨȩ    ϯаѰұӲԳճִ׵׶ض!  ޼߼(   OA1VG5YI7QC2J=.E9*XO?VM>TKTL=RJ;PH:NF8KD7 G@3D>2B;0?9.=7,  5/&2-$/*",' )%&"#tuvwxyz{|}~      J<-D7)>2%¤åĥŦƧǨȩɪ%    ٷаѰұӲԳմִ׵ضطٷڸ۹ܺݺ޻߼߼H;,UL=RJ2C<0@:.=7,  50&/*"0+"-( *%'#tuvwyz{|}~      @4'H:+£äĥŦƧǨȩɩʪ"  ЯѰұӲӲԳմֵ׵ضٷڸ۹ܹݺݻ޻߼ N@0WG6QB2vo[SK8-  61' 0+#-)! '# uvwxyz{|}~    ˫¤åĥŦƧǨȩɪʫ    аѰұӲԳմִ׵ضطٷڸ۹ܺݺ޻߼߽#   N@0VG5QC2J=.slYQI;OG9ME8KC6 F?3D=1A;/?8-<6+ 60'3.%1,#.)!+'($%!yuvwyz{|}~6,     £äĥŦƧǨȩɩʪ˫ }ήϮЯѰұӲӲԳմֵ׵ضٷڸ۹ܹݺݻ޻߼A4&  F:,M@0VG5RD3J=.L?/RJ;PH:NF8KD6;6,G@3D=1?9.=7,:4* 4/&2-$/*",' &"#uvwxyz{|}~2)7-!=1%D7)"    ¤åĥŦƧǨȩɪʫˬ̬ͭήϯаѰұӲԳմִ׵ضطٷڸ۹ܺݺ޻߼@5(PH:NF9LD7JB5G@4E>2B<0@9.=7,;5* 50&2-$/+"-(  '## uvwyz{|}~        £äĥŦƧǨȩɩʪ˫̬ͭήϮЯѰұӲԲԳմֵ׵ضٷڸ۹ܹݺݻ޼߼ K=.SE3   /B^0Da/B]ʨ  E9+L?/QC2I<-C7)ŤOG9LE7JC6HA4E>2C<0 >8-;5+93)60'3.%0+# $!vwxyz{|}~   B6)  ¤åĥŦƧǨȩɪʫˬ̬ͭήϯаѰұӲԳմִ׵ضٷٷڸ۹ܺݺ޻߼"   K=.TE4L?/Ȧ  -?Z/B]1Da3Ge5Jj4Ih2Ge1Da/B^-@[3(  E9+K>.RC2K=.D8*>3&ߺߺߺ޺޹mgUME8KC6HA4F?3D=1 ?8-<6+94)71'4/%1,# %!uvwyz{|}~0'5+ <0$    ¿äĥŦƧǨȩɩʪ˫̬ͭήϮЯѰұӲӲԳմֵ׵ضٷڸ۹ܹݺ! C8*K>/TE4L?/     -?Z/B]1Da2Ge5Ji4Ji2Ge1Da/B^.@[,>X4*>3&K>.ߺ޺޹޹޹ݹݹݸݸܸܷ۷۷۷۶keSKD6IB5G?3D=1 ?9.=6,:4*72(4/&2-$/*",' &"#vwxyz{|}~        ¿¤äĥŦƧǨȩɪʫˬ̬ͭήϯаѰұӲԳմִ׵ضطߺߺߺߺߺߺ&      +=V-?Z/B^1Da3Ge5Ji5Ji3Ge1Eb/B_.@\,>Y+2 @9.=7,;5*82(50&2-$/+",(  &## vwyz{|2)~        ¿¤åŦƧǨȩɩʪ˫̬ͭήϮЯѰұӲӲԳܸڶڶ۷۷۷۷ܷܷܷܸܸܸݸݸݸ~   G:,PB1޹޹޹޹޹޺޺޺޺ߺߺߺߺߺߺߺߺߺߺߺߺߺߺߺߺߺߺߺߺߺߺ޺޺޺) !  *;S+=V-?Z/B]1Da2Ge4Ji5Jj3Hf1Eb0C_.A\-?Y+=V*;T)9Qڶڶڶڶٶٵٵٵٵصشش״״׳׳ֲֲֳֳղղձԱԱԱJC6H@4E>2C<0@:/>8- 93)60'3.%0+#-)! &"$!vwxy  /&4*~    ¿¤äĥŦƧǨȩɪʫ˫̬ͭήϯаղֲֳֳֳ׳׳״״شششصصصٵٵٵٵٵ'  ?4'G:,OA1F:+>3&۶۷۷۷۷۷۷۷۷۷۷۷۷۷۷۷ܷܷܷܷܷܷܷܷ۷۷۷۷۷۷۷۷۷۷#0!  *;S+=V-?Y.A]0Da2Gd4Ih5Jj3Hf1Ec0C_.A\-?Y+=W*;T)9R(8O׳׳ֲֲֳֳֳղղղձԱԱԱӱӰӰӰүүүѯѮѮЮ|HA4F?3C=1A:/>8-<6+93)61'4.%1,#.)!  %!vwxz   ~   ¿¤åĦŧǨȨɩʪ˫̬ѮѯүүүүҰӰӰӰӰԱԱԱԱԱղղղղղղֲֲֳֳֳֳ8."?4'׳׳״״״״شششششششششششصصصصشششششششششششششششش״$2  (8P):S+2B;0@9.=7, 82(50&2-$/+",(  &## wxy   ¿ܹǦȧȧȧȧɨɨɨʨʩʩʩ˩˩˪˪̪!  =2&F:+F:+άάάάάϬϭϭϭϭϭϭЭЭЮЮЮЮЮЮѮѮѮѮѮѮѯѯѯѯѯѯѯѯѯѯѯѯѯѯѯ$ :0$A6(J=.G;,?4'ѮѮѮѮѮЮЮЮ$ &6M(8P):S+=VϭϭϭϭϭϬάάάάά̫̪̪̪ͫͫͫͫͫ˪˪˩˩ʩʩ6-"<1%C7*ЮɨȧȧȧǦǦǦǦƥƥƥťE>2C<0@:.>7- 83)60'3.%0+#-)! '#$ wxyz    ׵¢¢ãããģĤĤŤŤťƥƥƥƦǦǦǦǦȧȧȧȧɧɨ6,!>3&M?/ʩʩʩ˩˩˩˪˪˪̪̪̪̪̪̫̫ͫͫͫͫͫͫͫͫͫͫͫͬάάάάάάάάάάάάάάάάάάάάάά̫̫̪̪̪̪̪ͬͫͫͫͫͫͫͫͫͫͫ˪˪˩˩˩E8*ʩʩʨʨɨɨɨɨȧȧȧȧǦǦǦǦƦƥƥƥťŤŤĤĤģããã⢢¢C<1A:/>8-<6+93)61'3.%1,#.)!  wxyz ~j~ҰýýþľľĿſſ+" ʩ¢âãããģĤĤĤŤŤťťƥƥƥƥƦǦǦǦǦǦǧȧȧ  &5L(8Pɨɨɨɨɨɨɨʨʨâ  =3&F9+B7)ʩʩʩʩʩʩʩʩʩʩʩʩʩʩʩʩʩʩʩʩʩʩʩʨʨʨʨɨɨɨɨɨɨɧɧȧȧȧȧ <1%D8*F9+>3&8."ƥƥƥƥťťŤŤĤĤĤģããã⢢¢¢ſſĿľľb\LA;/?8-<6,:4*71( 1,$.*",' &"xyz{|}~¼¼½ý   6-!?4'A6(ſ¢¢¢¢¢ãããã      &5L(8P*;T-?Yťƥƥƥƥƥƥƥ" 7-"=3&G;,B7)?4'ǦǦǦǦǦǦǦǦǦǦǦǦǦǦǦǦƦƦƦƦƥƥƥƥƥƥƥƥƥťťťťŤŤŤĤĤĤſ5,!<1%ɨããã㢢¢¢¢ſſĿľľľþýý½¼¼[VG?9.=7,:4*82( 2-$/*",( &"# wxyz|}~8."¼¼½½ýýýþľľľ        $3I&6L(9Q*3&E9+¢¢¢¢¢¢¢ſſĿĿľľľþýýý½¼¼¼ 6-!<1%B7);1%5,!@:.=7,;5+83) 3.%0+#-( *&'#$ xyz{|}~v  $      $3I&6M(9Q+3&E9*>3&7-"ľľľľľþýþýýý½½¼¼¼¼ 5+!;0$C7)ƥ>8-;5+93)61'3.%0,#.)!+&($%!xyz{|~.%  7-"?4'6-!(     #1F%3I'6M)9Q+*;{4/&2-$/*",( )%&"uvvwwwxxxxyyyzzz{{{|          (:+=!.B$2G%4J#1F" .&6-!7-"/'JYaL[cN]fP_hRbkTdmVgpXisZlv\nx ' -$)"]oy\mxZlvYjtXir'7    *<,? .A"0D$2H%5K'7O&5L$3I#1F"/C -A,>*<);~~~~}}JE92-$/+"-( *&'#tttuuuuvvvwwwxxxyyyws_"      &7):,?!/C$2H%3I#1E!.B,>*;(9#&(  "%& #%!# !  M\d$L[dN]fP`iRbkTdnVgpXisZlv\ny    ]oz\nxZlvYjtXisVgqUfo  2)8.#1) (9*;,> .A"0D$2G%4J,>X~~~~}}} )"/&5,!0(+#{{{zzzy3.%0+#-)!'$%!rrrrsstp]     0'vvvvww   '7);,?"/D$3Izzz{{{{|||||}}}  .&7-"/&~~ !!""# #$!$%"%&"&'#'("%' "%& #%!# !     L[dN^fP`iRbkTdnVgpXisZlv\ny  ]pz\nx[lvYkuXisWhqUfpTenScl-%+~~~~~}}}}}|||||{{{{zzzzyyyyxxxxwwwwvvv1,#.)!+'($%!opppqqup]zrrrsssstttuuuuvvvvwwwwwxxxxyyyyyzzzzz{{{{{{|||||y(.1  ! !!""# #$!$%"%&#&'#'($()$'("%& #$!# !      M\dN^fP`iRbkTenVgqXisZlv]oy    ^p{\nx[mwYkuXisWhqVfpTenScmRbkQaj}}}}}}}}||||||{{{{{{{zzzz ,$3*0(*#xxxxxwwwwvvvvuuuutttssssrr/*",' )% mnnnnoooopppqqqqrrrrsssst ' /'.&' uuvvvvvvwwwwwxxxxxxxyyyyo+14 "!""# #$!$%"%&#&'$'($()$'("%& #%!# !     M\dO^gP`iRblTenVgqXjt[lv]oy   _r}^p{\ny[mwZkuXjsWhrVfpUeoSdmRbkQajP`iO^gzyyyyyyyyxx0(xxxxwwwwwvvvvvvuuuutttttssssrrrrq(!.&,$' ppooooto],( *%&#kkklllmmmm nnoooopppppqqqqrrrrrssssssttttt   (!1(uuuvvv|h-47 #$!$%"%&#&'$')%(*$'("%& #$!# !   M\dO^gP`iRblTenVgqXjt[lv]oy_r|atat`r}^p{]oy[mwZkuYjtWhrVgpUeoTdmRclQajP`iO_gN]fM\evvuuuu (!/&1(|tttttssssssrrrrrqqqqqppppoooonn$tmmmmllllkk?;0*&'#$!}i~i~i~ijj "+#kkklllllmmmmnnnnnoooooppppppqqqqqqqqrrrrrrrrIU[07;#&($')%(*#'("%& #$!# !ERZFT\HV^IX`KZbM\eO^gQ`iSclUenWgqYjt[lw]oz_r}atat`r}^q{]oy\mxZlvYjtWirVgqUfoTdmSclQakP`iO_hN^fM]eL[dxrrrrqqqqqqqppppppooooon & ,$,$&mmmllllkkkkjjjj~i~i~i}i}h}h|h|g|g+'($%!~zf~{f{g{g{g|g|g|h}h}h}i}i~i~i~ijjjjjkkkplZ &/'' llmmmmmmmnnnnnnnnooooooO\c4;?$'("%& #%!"!DRZFT\HV^IX`KZbM\eO^gQ`iSclUeoWhqYjt[mw]oy_r|bubu`s}^q{]oy\mxZlvYjtXirVgqUfoTdnSclRbkQ`iP_hN^gM]eM\dL[cKZbnnnnnmmmmmmmllllllkkkkkjjjj~j~i~i~i}i}h"1!/|h|h|g|g{g{g~{f~zf~zf*"' "|ye|xd|xd{xd{wd{wc)%&"{wd{xd{xd|xd|xd|ye}ye}ye}ye ~zf~zf~{f{g{g{g|g|g|h|h}h}h}h}h}i}i~i~iokY "0jjjjjk   $+$lkkkllIT[.48!# !CPXERZFT\HV^JX`KZbM\eO^gQajSclUeoWhrYjt[mw]oz_r|btbu`s~_q|]oz\nx[lvYjtXisWgqUfp[mwmkkkkkkkkjjjjjjj~i $)"2)u}i}h}h}h|h|h|h|g|g{g  ."1$3&6(9&7$4#2!/ -{xd{wd{wczwczwczvc yvbyubyubxubxuaxtawtawt`ws`vs`vs`&#wtaxtaxuaxuayubyub yvbzvbzvczvczwc{wc{wc{wd{xd{xd|xd          -"0%4'8(9{g{g{g{g|g|g|g|g|g|g|h|h|h|h}h}h}h:CH(-0CQXERZFT\HV^JXaKZcM\eO_gQajSclUeoWhqYjt[mw]oz`r}bubu`s~}h}h}h}h}h}h}h|h|h|h|h (!*#${g{g{g{f~{f~{f~zf~zf~zf~zf}zf}ye}ye}ye|ye|ye|xe|xd|xd{xd{xd{wd   + ."0$3%5(8'7%5#2!0 -+{xevs`vs`vr_ur_ur_uq_uq^tq^tq^tp^sp]sp]so]ro]ro\rn\?<1tq^tq^uq^uq_ur_ur_vr_vr_vs`vs`ws`ws`ws`wtawtaxtaxta     * -"1%5(9(9%5#2!/ -{wc{wc{wc{wd{wd{xd{xd{xd|xd|xd|xd|xd|xd|xd|xd|xd-47BOVCQXERZGT\HV^JXaLZcM\eO_gQaj\nx|ye|ye|ye|ye|ye|ye|ye|xe|ye|xd|xd|xd|xd|xd|xd|xd{xd{xd{xd{xd{wd{wd{wc{wc{wczwczwczwczvczvczvczvbyvbyvbyvbyubyubyubxuaxuaxta#)"#wtaws`ws`vs`vs`vs` )+ -"0#2%5'8'8(8sp]sp]so]ro]ro\ro\rn\qn\qn\qm[pm[ "&!nkYnkYnjYmjYmjX30(qm[qm[qn\qn\qn\rn\ %$sp]sp]sp]sp^tp^ + .#1*LJ>Release_v0.3/kernels/compiler_mul24.cl000066400000000000000000000002241223142177000200210ustar00rootroot00000000000000kernel void compiler_mul24(global int *src1, global int *src2, global int *dst) { int i = get_global_id(0); dst[i] = mul24(src1[i], src2[i]); } Release_v0.3/kernels/compiler_mul_hi.cl000066400000000000000000000002261223142177000203350ustar00rootroot00000000000000kernel void compiler_mul_hi(global int *src1, global int *src2, global int *dst) { int i = get_global_id(0); dst[i] = mul_hi(src1[i], src2[i]); } Release_v0.3/kernels/compiler_multiple_kernels.cl000066400000000000000000000001131223142177000224310ustar00rootroot00000000000000__kernel void first_kernel(void) { } __kernel void second_kernel(void) { }Release_v0.3/kernels/compiler_nautilus.cl000066400000000000000000000031261223142177000207260ustar00rootroot00000000000000typedef float2 vec2; typedef float3 vec3; typedef float4 vec4; #define sin native_sin #define cos native_cos #define tan native_tan #define normalize fast_normalize #define length fast_length #define mod fmod #define time 1.f inline vec3 reflect(vec3 I, vec3 N) { return I - 2.0f * dot(N, I) * N; } inline uint pack_fp4(float4 u4) { uint u; u = (((uint) u4.x)) | (((uint) u4.y) << 8) | (((uint) u4.z) << 16); return u; } #define OUTPUT do {\ const vec4 final = 255.f * max(min(gl_FragColor, (vec4)(1.f)), (vec4)(0.f)); \ dst[get_global_id(0) + get_global_id(1) * w] = pack_fp4(final); \ } while (0) inline float e(vec3 c) { c=cos((vec3)(cos(c.x+time/6.0f)*c.x-cos(c.y*3.0f+time/5.0f)*c.y, cos(time/4.0f)*c.z/3.0f*c.x-cos(time/7.0f)*c.y, c.x+c.y+c.z+time)); return dot(c*c,(vec3)(1.0f))-1.0f; } __kernel void compiler_nautilus(__global uint *dst, float resx, float resy, int w) { vec2 gl_FragCoord = (vec2)(get_global_id(0), get_global_id(1)); vec2 c=-1.0f+2.0f*gl_FragCoord.xy/(vec2)(resx,resy); vec3 o=(vec3)(c.x,c.y,0.0f),g=(vec3)(c.x,c.y,1.0f)/64.0f,v=(vec3)(0.5f); float m = 0.4f; for(int r=0;r<100;r++) { float h=e(o)-m; if(h<0.0f)break; o+=h*10.0f*g; v+=h*0.02f; } // light (who needs a normal?) v+=e(o+0.1f)*(vec3)(0.4f,0.7f,1.0f); // ambient occlusion float a=0.0f; for(int q=0;q<100;q++) { float l = e(o+0.5f*(vec3)(cos(1.1f*(float)(q)),cos(1.6f*(float)(q)),cos(1.4f*(float)(q))))-m; a+=floor(clamp(4.0f*l,0.0f,1.0f)); } v*=a/100.0f; vec4 gl_FragColor=(vec4)(v,1.0f); OUTPUT; } Release_v0.3/kernels/compiler_nautilus_ref.bmp000066400000000000000000006000661223142177000217500ustar00rootroot00000000000000BM66(&%$('&)'&)(&*('*)'+)'-+).+).,)/,)0-*0-*1.*1.*2.+3/+3/+40+50,51,61,61,72-:4.:4/;5/;5/<6/=60=70>70?81?81@91A91C;3D<3E<4F=4F=4G>5H>5H?5KA7LB7MB8MC8NC8OD8RF:SG;VI=VJ=WJ=XK>YK>YL>ZL?[M?\M?\N?`QBaQBaRBbRBcSCdSCdTCeTDfUDgUDgVEhWEiWEjXEjXFkYFo\Hs_Ks_Kt`Ku`L|fP}fPlTmUmUnUnVtZw]x]x]y^z^z^{^{_{_{_z^z^w\y^x]x]u[w]tZsZu\u\w]y_x_x^u\rZqYqYr[w_t\v^w_yaxau^u^v`v_r]r\s^wavau`u`r^n[p]mZp^|kZyhWueUqbSh[MeXKcWJ_THWMBSI@ME=J?5I?5I?4H>4E<2G>4G=4F=3F=3@8/@7/<5-92*92*3-&6/(81*70*63/IC=KD=ICRJCSKCUMDUMDWOFXOFZQHZQHZQHZQH]SJ]SJ]SJ]SJ_UK_UKbWMbWMdYOh]Rh]Rj_Si]Rg[Pi]Rg\Pg\Pg\Pg\Pk`Tl`TnaUnbUnbUnbUnbUnbUnbUobUobUqdWsfYqdWsfYsfYvhZtfYrdWrdWreWtfYtgYreWreWreWseWqcVqcVqcVqcVoaTm`Rm`SobTobTobTm`Sn`Sn`Sl^Qj]Oi\Oh[Og[NfZN_TI\QG[QF)(&*('*)'+)',)'.+).,)/,)/-*0-*1-*1.*2.+3/+3/+40+40,72-83.83.93.94.:4/;5/;5/<6/=60=70>70?80?81@81C;3C;3D<3E<4E=4F=4G>5H>5H?5KA7LB7MB8MC8NC8OD8OD9UIYK>YL>ZL?[M?\M?\N?`QBaQBaRBbRCcSCdSCeTCeUDfUDgVDhVEhWEiWEjXFkXFkYFlYFp\It_Kt`KxcNycNlTlUmUmUnVoVrXrYv[v[w\z^{^{_|_|_|_{_{_}az^z^v\y^x]u[u[w\tZv\u\u[w]y_x_x^rZqZqYpYzat\s\u^w_yaxau^t^v_s]r]t^u`wavau`t`t_p]r_o\r_o]{jYwgWtdUm_QgZMh[NeXL_SHYNDSI@QH?ME=?93;2)E;2G=4F=3C:1@8/@7/:2+92+92*A<6LF?JCOG@OG@UMEVMEVMEXOFXOFXOG[QH]SJ]SJ]SJ]TJ]TJ]TJ^TJ^TJ`VL`VL`VLbXMbXMg\Qi]Rk_Tg\Qg\Qg\Qi^Rg\Qg\Qh\Qj^Rl`Tl`TnbVnbVnbVobVobVobVobVobVobVqdWqdWocVtfYtfYreWtgYtgYtgYreWtgYreWseWseWseWseWqcVqcVqcVqdVm`Sm`Sm`SpbTm`SpbTn`Sl^Ql^Qj]Pj]Pj]Pl_QnaSk^QfYMeYMbVKaVJ+)'+)'.+).,)/,)/-*0-*1-*1.*40,40,51-61-62-72-73.83.93.94.:4/;5/;5/<6/=60=70>70?80?81B:3C;3C;3D<3E<4E=4F=4G>4G>5H?5I?5LB7MB8MC8NC8OD8RF:UIYK>ZL>ZL?[M?\N?]N@`QBaQBbRBbRCcSCdTCeTCeUDfUDgVDhVEhWEiWEjXFkXFkYFlYGp\IwbM{eO|fP|fPiRmUmUnUnVoVoVsYsYtYtZuZuZv[w[}`}`|`|_|_{_{_}`z^y^v\x]x]u[t[tZsZu\u\t[v]v]x^w^rZqZpYr[t]t\s\w`w_xau^u^t^s]s]t_s^u`wavau`vbvas_t`q^taq_}l[wgWtdUn_Qj\Ni[Nh[NbVJ^SHYNDZPETKAOF=PG?QI@RJAQIAQIAPHARJBPH@PH@PH@PH@RJBULDULDYPG[RI[RI[RIYPG[RI[RI^TJ^TJ\RI\RI^TJ`VL`VL^TJ^TJ`VLaVLaVLcXNeZOeZOg\Qg\Qj^Rh\Qh\Qh\Qj^Rh\Qh\Qj^Sj^Sl`TobVobVobVobVobVobVocVocVocVpcVreWpcVpcVpcVreXreXtgYugYugYugYugYseXseXseXseXqdVqdVqdVqdVobTm`Sm`Sk^Qn`Sn`Sl_Ql_Ql_Qj]Pj]Pl_Ql_QoaSm_Qm_Ri[Nh[NgZNdXL\QF0-+0.+1.+2.+2/,3/,40,40,51-51-62-72-72.83.93.94.:4/;5/;5/<6/=60=70>70?80A:2B:3C;3C;3D<3E<4E=4F=4G>4G>5H?5I?5J@6MB8MC8NC8QF:RF:SG;VI=WJ=WJ=XK>YK>ZL>ZL?[M?\N?_PA`QBaQBbRBbSCcSCdTCeTDfUDfUDgVDhVEiWElZGmZHn[Ho[Ho\Ip]IwbM{eP|fPiRjSjSmUqXqXrXrYv[v[w\w\x\y]y]z]z^{^x\z^}`|_|_{_{_z_z^y^v\v[x]u[t[qXsZsZu\u[t[v]v]x^rZqZqYpYr[t\s\u^w_v_xau^t^t^q[r]t^v`waycxbwbvbuar_r^q^r_sap_ziYqbSk]Oj\Oi\Nh[NgZMdXK^SH[PEZPE^SH[PFZPF^SI[QGZQGZPGZPGYPF[RH]SJ_UK]SI_UK_UK_UK_UK_UK_UKaWL_UK_UK]SI]SIaWLaWLaWLaWL_UKaWLaWLaWLcYNf[PdYNdYNf[Pf[Ph]Qh]Qh]Qh]Qj_Sh]Qk_Sk_Sk_SocVocVocVocVocVocVpcVpcVpcVpcVpcVreXpcVpcVpcVpcVpcVseXugYugYugYseXsfXqdVsfXvhYqdVrdVrdVpbUn`Sl^Ql_Qj]Pl_Ql_Ql_Ql_Qj]Pj]Pm_Rm_Rm_Rm_Rm_Rk]Pi\NgZMgZMdXK_SG\QE2/+2/,3/,40,40,51-61-62-72-73.83.93.94.:4/;5/;5/<6/=60=70>70?80A:2B:3C;3C;3D<3E<4E=4F=4G>5H>5H?5I?5J@6J@6MC8PE:QF:RF:SG;VI=WJ=WJ=XK>YK>ZL>ZM?[M?_PA_PA`QBaRBbRBcSCcSCdTCeTDfUDiXFjXFkYGlYGo\Ip]Jq^Jr^JvaMwbMwbM~hRkTlTlUmUnUqXqXrXsYv[w\w\x\x]y]z]z^{^{^|^{^x\x\z^|`|_{_{_{_}`z^y^v\v[x]u[t[qXsZu\u\t[t[v]u\u\rZqZpYpYoYt\s\u^w_v_u_u^t^q\s]r]q\xbycxcxbwbvbuar_q^q^r_q_p_n\tdUn_Qh[MgZMfYLh[Ni\OdWKcWK]RG_TI`UJ`UJ_TJ_TIbWLbWLdYNcXNaVLaVL`VL`VL`VL`VKbXMbWMbWM`UK`UK^TJ^TI`UKbWMbWMbWMbWMbWMbWMdYNbWMdYNdYNdYNdYNf[Pf[Pg[Pg[Pi]Qi]Qk_Sk_Sk_Sk_Sk_SpcVpcVpcVpcVpcVpcVpcVpcVpcVpcVpcVpcVqcVqcVqdVobUobUobUqdVsfXsfXqdVobUrdVrdVrdVtfXtfXpbUl_Rl_Rj]Pj]Pj]Pl_Rj]Pl_Rj]Pj]Pm_Rm_Rm_Rm_Rm_Rk^Pi\Oi\OgZMeXKaTH_SF^RFYNC40,40,51-61-62-72-83.83.93.94.:4/;5/;5/<6/=60=70>70A92A:2B:3C;3C;3D<3E<4E=4F=4G>5H>5H?5I?5J@6MB8MC8QE:QF:RF;SG;SG;WJ=WJ=XK>YL>ZL>[M?^OA_PA`PA`QBaRBbRBcSCiXGjXGkYGlZHlZHm[Hn[Io\Ip]Iq]Jq^JuaLvaMwbMxcM|fPkTlTmUmUnVnVrXrYsYtYw\x\x\y]y]z]{^{^|^|_|_|_{^{^x\w\z^|_|_{_{_z_z^y^y^v[u[w]t[qXqXpXu\t[t[s[u]u\t\oXqYpYpYt]s\s\u^v_v_u_t^q\q\r]t_s^u`ycxcwbwbvbuar_q^q^p^q_p_p^tdUqbSk]Oj\Oi\NfYLj\OdXLaUJ^SH`TI_TIaVJ`UJ`UJaVKaVKaVK`VKbWLdYNdYNdYNcXNcXNaVLaVLaVLaVLcXMaVLcXMcXMcXMaVKcXMaVKcXMeZOeZOeZOeZOeZOeZNg\PeZNg\Pg\Pg\Pi^RnaUl_Sl_Sl`Sl`SpcWpcWpcWpcWpcWpcWpdWqdWqdWqdWqdWobUobUobUm`Sm`SobUobUqdWrdWrdWrdWpbUrdWrdWpbUpbUnaSl_Rj]Pj]Pj]Pj]Pj]Pl_Rj]Pm_Rk]Pm_Rm_Rm`Rk^Pm`Rm`Ri\Oi\OgZMgZM_SG_SG_SG_SG[OCYMB61-62-72-83.83.94.:4.:4/;5/;5/<6/=60=70@92A92A:2B:3C;3D;3D<3E<4F=4F=4G>5H>5H?5KA7LB7MB8NC8QE:QF:RF;SG;TG;WJ=XK>XK>YL>ZL>]OAaRCbRCbSCfVFgVFhWFhWGiXGjYGkYHlZHmZHm[In\Io\Ip]Jt`Lu`LuaLvbMwbMxcN|fPiRlUmUmUnVoVoVsYsYtYw\x\x]y]z]z^{^{^|_}_}_}_|_|_{^{^x\w\w\y]|_{_{_z_z^z^y^v\u[u[t[qYqXpXpXu\t[t[s[s[u\qZnXpYmWoYt\s\r\w_v_s]r\t^q\u_u_t_s^wbvaxcwbvbvbs_r_q^q^p^q_q_n]{kZsdUm_Qm_Pl^PfYLeYLeXLdXKfYM`UIbVKfZNeZNcWLbWLbWLbVKaVKcXMaVK`VK`UK`UK`UKbWLbWLbWLbWL_UJaWLaWLcXMcXMaVLcXMeZOeZOeZOeZOeZOfZOfZOh\PfZOh\Ph\Ph\Pj^RnbUl`Tl`Tl`TnbUqdWqdWqdWqdWqdWqdWqdWobUobUobUobUobUobUobUm`Sm`Sm`SpbUpbUrdWpbUpcUpcUreWreWl_Rj]Pj]Ph[Oj]Pj]Pj]Ph[Oh[Ok]Pk^Pk^Pm`Rm`Rm`Rk^Pk^Pn`Rl^Pi\OgZMgZM_SG_SG_SG[OCYMBYMBYMBYMB83.83.94.:4/:5/;5/<5/<60=60@92@92A:2B:2B;3C;3D;3D<4E<4F=4F=4G>5J@7KA7KA7LB7MB8NC8NC8QF:RF;SG;VJ=WJ=ZM?[M@^PB_QBcSDdTEdUEeUEfVFgVFhWFiXGiXGjYGkYHlZHm[Hn[In\Ir_Ks_Lt`LuaLvaMwbMwbM{eP|fPiRlUmUnUnVoVpWsYtYtZx\x\y]y]z^{^{^|_|_}_~`}`}_}_|_|_|_{^x\x\w\z^y]{_{_{_z_|`y^y^v[u[u[t[qXqXpXrZt[t[s[s[rZt\qZnXnWmWoYs\s\u^v_v_s]r\r\v`u_t_t^s^wbvauau`t`vas_r_q^n\p^saq_p^wgWvfWn_Qj]Oj\Oi\Nh[NeYLeXLfZMcWKeYMeYMbVKaVJ_TI^SH`UJ`UJ`UJ_TJcXMcXMcXMcXMcXMcXLbWLdYNbWLdYNdYNf[Of[Of[Of[Of[Of[Of[Of[Of[Oh]Qh]Qf[Oh\Qh\Qh\Qh\Qk^Rh\Qk^Rk^RobUobUobUobUobUm`TobUobUobUobUobUobUpbUpbUpcUnaTnaTnaTpcUpcUpcUpcUnaTnaTnaTnaTj]Pj]Ph[Oh[Oh[Oj]Pk^Pk^Pk^Pi\OgZMk^Qm`Rm`Rn`Rk^Qn`Rl^Qj\OhZMeYL_SG_SG]QE[ODYMBYMBYMBYMBYNBYNB:4/;5/;5/<6/=60=70@92@92A:2B:3C;3C;3D<3E<4E=4H?6I?6J@6J@7KA7LA7LB7MB8PE:QF:RF:UI5G>5H?6H?6I@6J@6MC8MC9ND9OD9PE:PE:THXK>[N@\NA\OA]OAaRCaSDbSDcTDdTEeUEfVEfVFgWFhWFiXGjXGkYGn\Jo]Jp]Jq^Kr^Ks_Kt`Lt`LuaLvaMwbM{eO|fP}fPjSjSkSnVoVoVpWqWqXuZuZy]y]z^{^{^|_}_}_~`~```~`~`~`}`}_}_|_|_{_x\x\w\w\w[y]y]x]z_z_y^y^y^x^u[t[oWqXpXpXoXrYqYs[s[rZrZoXqZkU}hSlWqZpZr\t]v_s]u^r\s^u_t_t_s^s^vavauat`t`s`r_r_o]~l[n\~m\~l[zjYteUqbSpbSpaSqcTn`Rk^Pk]PeYLeXLgZMfZMh[Oh[OeYMdYMbVKbVKaVJaVJaUJ`UJbWLbWLf[Oh\Pk^Rj^Rj^Rj^Rh\Ph\Ph\Ph\Ph\Pg\Pg\Pg\PeZNg\PeZNeZNg[Pi]Qi]Qi]Ql_Si]Qi]Ql_SnaTnaTnaTnaTnaTnaTnaTnaTnaTnaTl_SnaTnaTpcVnaTnaTl_Rl_Rl_Rl_Rl_Rj^Qj^Qj^Qm`Rk^Qk^Qi\Oi\Oi\Oi\Oi\Oi\Ok^Qk^Qk^Ql^Qn`Sl^Qj\Oj]Ol_Ql_QfYLdWJaUI]QEYMBYMBYNBYNBYNBYNBYNBYNBWLAWLAXLAXLAC;4C;4D<4E<4E=5F=5G>5G>5JA7KA8LB8OD:RGWK>XK>YL?\NA]OA]OAaRDbSDcSDcTEdUEeUEfVFgVFhWFhWGlZIm[In\Io\Jp]Jp]Jq^Kr_Ks_Lt`Lu`LvaMvbMzeO{eP|fP}gPjSkSkSnVoVpWpWqWrXrXv[y]z]z^{^|^|_}_~`~``aaa``~`~`}`}_}_|_|_|_x\x\x\w\w[v[y]x]x]z_z^v\y^x^u[u[t[qYqXpXpXrZqYqYs[r[rZoXnXnW}hSjUlWqZpZoZt]s]u_r\t^s^u_t_t_s^u`vavauat`vbuap]}kY|kY~l[}lZ~m\~m[xhXueUqbSsdUrdUrcTo`Rl^Pi\Nh[Nj]PgZNgZNi\Oj]Qj]Pg[OeYMdXMdXLdXLfZNfZNeYNg[Og[Og[Ok_Ri]Qk_Ri]Ph\Ph\Ph\Ph\Ph\PfZNfZNfZNfZNfZNh\Ph\Pj^Qj^Qj^Ql`Sj^Qj^Ql`Sl`Sl`Sl`SnaUnaUnaUnaTnaTnaTl`Sl`SoaTl`Sj^Qj^Qm`Sm`Sj^Qj^Qm`Sh\Ok^Qk^Qk^Qk^Qi\Oi\Oi\Oi\Oi\Oi\Oi\Oi\Oi\Oi\Ol^Ql^Qn`Sl_Ql_Qj]Oj]OaUIdWJbUI]QFYNBYNBYNBYNBYNBYNBYNBZNBZNBXLAXLAXLAXLAXLAI@8LC:MC:MD:ND;OE;PE;PF;QFWJ>WK>XK>YL?\OA]OA^PBaRDbSDcTDdTEdUEeUEiXGjYHkYHkZIl[Im[In\Jo\Jp]Jq^Kr^Kr_Ks_Lt`LuaLvaMzdO{eO|fP}fP}gP~hQkSkTlToVpWqWqWrXsXv[y]z^{^{^|_}_}_~```accaa``~`~`~`}`}_|_|_|_{_x\x\w\w\v[v[x]x]w]z_y^y^x^x^u[t[qYqXpXpXoXqYqYpYs[rZqZnXnX}hS}hSjUlWpZpZoZv_s]p[oZt^s^r]t_s_s^t`vavauat`t`s`n\}kY|kY~l[}lZ|kZ~m\zjYueVteUsdUsdUm_Qm_Ql^Pk^Pm`Rm_Rj]Pi\Oi\Ok^Qm`Rl_Rl_Rg[Og[NgZNfZNfZNh\Pj^Qj^Qj]Qj]Qg[OeYMeYMeYMi]Qg[Og[Og[Of[Of[OfZOh\Ph\Ph\Pk^Rk^Rj^Rm`Sj^Rh\Pj^Rm`Sm`Sm`Sm`Sm`SobUm`Sm`Sj^Qk^Qk^Qm`Sm`Sk^Qh\Pi\Pi\Pk^Qi\Pk^Qi\Pk^Qk^Qi\Pi\OgZNgZNi\Pi\Oi\Pi\Pi\Pl^Qj]Pg[Nl_Ql_Ql_Ql_Qj]Ph[NdWK_SG_SG[PDYNBYNBYNBYNBYNBYNBZNBZNBZNCXLAXLAXLAXLAXMAXMAXMAND:OE;OE;PF;QFWJ>XK>XL?YL?ZM?]OA^PBbSDeUFfVFgWGhWGhXGiXHjYHkZHlZIm[In[In\Jo]Jp]Jq^Kr^Ks_Kt`Lt`LuaLydOzeO{eP|fP}gP~gQhQkSlTlTpWpWqWrXrXsXv[w[z^{^|^|_}_~`~``aaddcaaaa`~`~`~`}`}_}_|_|_{_x\x\w\w\w[v[x]x]x]w]y^v\y^x^x]t[qYnVpXpXoXoXqYqYpYpYrZoXnXkV}hSjUlWlVpZoZq\s]p[p[oZq\p\p[o[q]p\r^vauauat`t`q^n\}kZ|kY{jY}lZm\~m\xhXueVrcTqbSn`Qn_Qm_Ql_Qn`Rn`Rm_Rj]Pj]Pi]Pi\Oi\Om`Sm_Rj]Pg[Og[Oi]Pk^Rk^Rk^Rk^Qj^QfZNfZNeZNeYNcWLcWLcWLeYMeYMeYMg[Og[Og[Og[Oi]Pi]Pi]Pk^Rg[Og[NgZNk^Rk^Rk^Rk^Rk^Rk^Rm`Sm`Sk^Rk^Rk^Rk^Ri\Pi\Pi\Pi\Pi\Pi\PgZNi\Pi\Pk^Qi\Pk^Qg[Ng[Ng[Ng[Nj]Pj]Pj]Pl_Qj]Ph[Nh[Nl_Qh[Nj]Ph[Nh[NdWKbUIbUIWLAWLAYNBYNBYNCYNCZNCZNCZNCZNCXLAXLAXMAXMAXMAXMAYMAYMAVK@QFWK?WK?XL?YL@ZM@[M@[NA\OA]OA^PBaRDbSDeVFfVFgWGhWGiXGjYHjYHkZHlZIm[In\Io\Jp]Jp^Kq^Kr_Ks_Lt`LuaLydOzdO{eO{fP|fP}gP~gQhQkSlTmTmUqWqWrXrXsYw[w[{^{^|_}_}_~``aacdddddaaaa`~`~`~`}`}`}_|_|_{_x\x\w\w\w\v[v[x]x]w]w\y^v\x^x^w]qYqYnVpXpXoXqYqYpYpYoXqZnXkV}hS}hS|gSlWpZpZr\q[s]p[o[q\q\p\p[o[n[r^t`vauauat`s`n\n\}kZyhW{jYn\|kZ{kZsdTscTrcTo`Rn`Qn`Qm_QoaSo`Rn`Rn`Rk]Pj]Pj]Pj]Pi\Oi\Ok^Qh\Oj]Ql_Rl_Rl_Rk_Rk_Rk^RfZNfZNdXLdXLdXLcXLcXLcXLcXLeYNeYMg[Og[Og[Oi]Qi]Qi]Qi]Qg[Og[Og[Ok_Rk_Rk_Rk_Rk_Rk_Rk_Rg[Ni]Pi]Pk_Rk_Ri]Pi]Pi]Pi]Pi]Pg[Ng[Ng[NeYMi]Pi]Pj]Pg[Ng[Ng[Nh[Nh[Nj]Pj]Pl_Rh[NfYMfYMfYMh[Nh[Nh[NfYMbVI`TH[PDYNCWLAWLAYNCZNCZNCZNCZNCZNCZNCXMAXMAXMAXMAXMAYMAVK@WK@WK@WK@VJ>WK?WK?XL?YL@ZM@ZM@[N@\NA]OA]OA^PB_QBbSDfVFgWGhWGhXGiXHjYHkZHlZIm[Im[In\Jo]Jp]Jq^Kr^Ks_Ks`LwcNxcNydOzeO{eO|fP}fP~gQ~hQkSlTlTmTmUnUqXrXsXv[w[w\x\|^|_}_~`~``aaddddddddaaaaa`~`~`}`}`}_|_|_|_x]x\x\w\w\v[v[v[x]w]w\v\y^x^x^w]t[qYqYnVpXoXoXqYpYpYoYoXoXlVkV}hS|hS{gS~iUpZr\q\q[p[p[oZq\p\p\o[q]q]p\r^vauauat`q^n\n\m[yhWyhWxgWziYwgVsdTpaRpaRoaRo`Rn`QpaSoaSoaSn`Rn`Rk^Pk]Pj]Pj]Pj]Pi\Oi\Om`Sm`Sj^Qj]Ql_Rl_Rj]Pi]PeYMeYMdXMdXLdXLdXLdXLbVJfZNfZNfZNfZNfZNeZNeYMeYMh[OeYMeYMg[Oj]Qj]Qj]Qj]Pj]Pi]Pi]Pg[Og[Og[Ol_Ri]Pj]Pj]Pj]Pj]Pg[Ng[Ng[NeYMcWKeYMj]Pj]Ph[Nh[Nh[Nh[Nh[NfYMj]Ph[NfYMfYMdWKdWKdXKi[NfZMbVI^RF^RFYNCYNCZNCZNCWLAZNCZNCZNCZOCZOCXMAXMAXMAXMAYMAYMAWK@WK@WK@WK@WK@WL@YL?YM@ZM@[N@\NA\OA]OA^PB_PB`QBcTDdTEgWGhXGiXGjYHkYHkZHl[Im[In\Jo\Jp]Jp^Kq^Kr_Ks_LwbNxcNydOzdOzeO{eP|fP}gP~gQhQkSlTlTmUnUnUrXrXsXv[w[x\x\y\}_}_~``aaaddeedddddaaaaaa`~`~`~`}`}`|_|_|_x]x\x\w\w\v\v[v[u[w]w]w\v\sZx^w^w]qYqYnVmVoXoXnWqYpYpYoXoX~iT~iT}hS|hSjU{gS}iTr\r\q\nYp[o[q\q\p\r]r]q]p]p\r^s_s_r_r_m[n\n\ziXyhXvfUxhWziYrbSqbSqaRpaRoaRo`RqbSpbSpaSoaSoaSl^Ql^Pk^Pk]Pj]Pj]Pl_QnaSk^Qm`Sk^Qk^Qj^Qh[OeYMcWKcWKcWKcWKeYMeYMdXMdXLdXLdXLdXLdXLdXLfZNdXLaVJdXLaVJaVJcXLcXLh[Oh[Oh[Oh[Oh[OeYMh[Oh[Oh[Oj]Qh[Oj]Pj]Pj]Pj]Ph[Oh[OfYMfYMcWKfYMfYMfYMfYMfYMfYMfYMfYMh[Oh[OfYMfZMbVIbVJdXKgZMdXKbVJ^RF^RFYNCZNCZNCZNCZNCZNCXMAZOCZOCZOCXMAXMAXMAYMAYMAWK@WK@WK@WK@WK@WL@WL@WL@XL@\NA\OA]OA^PB_PB_QB`QBaRCdUEhWGiXGiYHjYHkZHlZIm[In[In\Jo]Jp]Jq^Kr^KvaMwbNwcNxcNydOzeO{eO|fP}fP~gQ~hQhQlTlTmTmUnUoUoVuZv[w[w\x\y\y]z]~`~``aaaddeeeeeddddbaaaaaa~`~`~`}`}`}_|_|_y]x\x\w\w\w\v[v[u[x]w]w\v\v\sZx^w]t[qYpYmVmVoXoXqYpYpYoYoXnX~iTzfR}hS|hS~jU~iUkVoZoZnZnYp[o[q\s^r^r]q]q]p]p\r^s_s_tar_o\n\}lZxgVwfVvfVscSwgWo`Qo`QqbSpaRpaRrcTqbTpbSpbSpaSoaSl_Ql^Ql^Qk^Pm`RoaToaSoaSnaSi\Pk^Qk^QfZMfZMfZMcWKcWKcWKcWKcWK^SH`UIbWKbWKbWKbVKbVKbVKbVKbVKdXLbVJbVJdXLdXLfZNfZNfZNfZNdXLdXLdXLdXLdXLfZMfZMfZMfZMh[OfZMfYMaVJdXKdXKaVJaVJdXKdXKfZMfZMfZMfZMh\Oi\OfZMbVJbVJbVJbVJdXKbVJ^RF^RF\PE\PEZNCZNCZNCZNCZOCZOCZOCXMAZOCXMAXMAYMBYMBVK@WK@WK@WK@WK@WL@WL@WL@XL@XL@XL@VJ?_PB_QB`QBaRCbRCbSChXGiXHjYHkZHlZIm[Im[In\Jo\Jp]Jq^KtaMuaMvbNwbNxcNydOzdO{eO{fP|fP}gP~gQhQkSlTlTmUnUnUoVrXv[v[w[x\x\y\z]z]{^`aaabdeeeeeeeedddbbaaaaaa~`~`~`}`}`}`|_|_y]x]x\w\w\w\v\v[u[u[w]w]v\v\sZrZw]w]t[qYpXmVmVoXnWpYpYoYoYoX~iT}iTzfRyeQ~jU~iUkWkVoZoZ}iVnYp[q]s^s^r^r]q]q]p]r^q^uauar_r_o\~lZ{iXxgVtdTtdTscTscSo`Qo`QqbSpaRrcTrcTqbTqbSpbSpaSoaSm_Ql_Ql^Qn`RpbTpbToaToaToaSj]PeYLgZNgZNgZNdXLdXLdXLaVJ_SH_SHcWKcWKcWKcWKcWK`UIbWKbWKbWKdXLbVKbVKdXLdXLdXLfZNfZNfZNdXLdXLdXLdXLdXLdXLdXLfZMfZMfZMfZMbVJbVJdXLdXL`TH`THdXLdXLdXLfZMfZMi\Oi\OdXLbVJbVJbVJbVJbVJ^RF^RF^RF\PEZNCZNCZOCZOCZOCZOCZOCZOCZOC[OCXMBYMBYMBYMBWK@WK@WK@WL@WL@WL@WL@WL@XL@XL@XL@VJ?VJ?TH=bRCbSCcSDfVFjYHkZHlZIl[Im[In\Jo\Js_Ls`Lt`MuaMvbMwbNxcNxcNydOzeO{eP|fP}gP~gQhQkSlTlTmTmUnUqXrXsXsYw[w\x\y\y]z]{]{^`aabbeeefeeeeeeeeddbbbaaaaa~a~`~`}`}`}`|_|_y]x]x\w\w\w\v\v[u[u[u[w]v\v\v\rZu[w]t[s[pXpXmVlVnXnWpYpYoYoXlV~iTzfRyeQyeQ{gS}iUkVkVoZ~jV}iUkWo[q\s^s^r^r]q]q]p]t`t`s_uar_o]|jYyhWxgVueTtdTtdTqaRn_PpaQo`Qo`QsdTrcTrcTrcTqbTqbSpbSm_Qm_QoaSoaSqbTpbTpbTpbTobTm_RfYLeYLeYLg[NeXLbVJbVJbVJ_THbVJdXLdXKcWKcWKaUJaUIcWKcWKcWKeYMcWKcWKeYMeYLeYLeXLgZNdXLdXLdXLdXLdXLbVJdXLdXLdXLdXLbVJbVJ`THbVJdXLbVJ`TH`THbVJdXLdXLdXLi\OgZMgZMeXLbVJ`TH`THcVJZNC\PE^RG\PEXMBXMBZOCZOCZOCZOCZOCZOC[OC[OC[OCYMBYMBWK@WK@WK@WL@WL@WL@WL@WL@XL@XL@XL@VJ?SH=TH=TH=VK?VK?gWFkZHlZIl[Ip^Kq^Kr_Lr_Ls`Lt`MuaMvbMwbNwcNxcNydOzdO{eO|fP|fP}gP~hQkSkTlTmTpWqWqWrXrXsYtYw[x\x\y\z]z]{^|^|^aabdeeeffffeeeeeeedbbbbaaaaaa~`~`}`}`}`|_|_y]x]x]x\w\w\v\v[u[u[u[t[v\v\v\u\rYt[qYs[pYpXmVlVlVkUnWpYoYoYlV~iTzfRzfRyeQxeQ~iU}iU|hT|hToZ}iVkWmYo[q\q\p\p\o[o[p]p]r^q^q^n[}kY}kYyhWyhWveUueTtdTtdTqaRk]Nm^Po`QqbSsdUsdTrcTrcTqcTqbTpbSpbSpaSoaSqcUqcTqcTpbTpbTk^Ph\OfYMfYMcWKcWK`UIcVJ`TI`TIbVJdXLdXLdXLbVJaVJaVJcWKcWKcWKeYMcWKcWKeYMeYMeYMeYMg[NeYMeYLeYLeYLeXLbVKeXLeXLeXL`TI`TI^RG`TI`TI`TIbVJ`TI`TI`TIeXLeXLbVJeXLgZNeXLcVJ`TH`TH`THXMBZOC\QE^SGZOCXMBXMBZOCZOCZOC[OC[OC[OC[OC[ODYMBYMBWK@YMBWL@WL@WL@WL@WL@XL@XL@XL@VJ?SH=TH=TH=TI=TI=WK?WK?WK?p^Kq^Kr_Lr_Ls`Lt`MuaMvaMvbNwcNxcNydOzdO{eO{fP|fP}gP~gQmUnUnVoVpWpWqWrXrXsXsYtYx\x\y\z]z]{]{^|^}_}_bbeeefffffffeeeeeeeebbbbbaaaaa~a~`~`}`}`|`|_y]x]x]x\w\w\v\v\v[u[u[t[tZv\v\u\rZrYqYqYs[pXoXlVlVlUkUpYoYoYlVlV}iTzfRyeQxeQ~iU}iUkW|hT{hT}jV}iVkWmYo[q\p\p\o\q]q]p]n[o\o\lZn\}kYziWyhWyhWveUueUudTqbRqaRk]Nk\No`QqbSqbSsdUrcTrcTrcTteVqbTpbSpbSpaSrcUqcUqcUn`Sl^Qi\Oi\OdXKdWKdWKaUIaUI^SG`UI`TIeXLeXLbVJbVJbVJbVJdXLdXLdXLfZMcWKfYMfYMeYMeYMeYMcWKeYMeYMeYMeYMcWKeYLeYLeYLcWK`UI`UI^SG^SG^SG`UI`UI^SG`UIcWJcVJcWJeXLeXLeYLeYLaUIaUIZODXMBXMBZOC\QEZODXMBVK@XMBZOD[OD[OD[OD[OD[OD[ODYMBWK@WL@WL@WL@YNBZNBWL@WL@XL@UJ?VJ?SH=TH=TH=TI=TI=TI=WK?WK?WK?YMAZMAs`Lt`MuaMvaMvbNwcNxcN|fQ}gQ~gQ~hRiRiRjSmUnUnVoVoVpWqWqXrXsXsYtYuYuZy\y]z]{]{^|^|^}_~_~`beefffffffffffeeeeeebbbbbbaaaaa~a~`~`}`}`|`|_y]x]x]x\w\w\v\v\v[u[u[t[t[v\v\u\u\rYoWqYpYs[pXoXlVlVkUkUpYoYlWlV}iTzfRyeRyeQ{gS}iU}iUkV{hT~jV}iV|iV|iU~kWo[nZnZmZo\o[n[n[n[o\o\lZ{jX{iXziWyhWyhWveUueUrbSrbRo_Pl]Nn_PpaRo`QqbSqbSsdUueVteVteVqbTqbTpbTrdUrdUoaSoaSl_Qj\OgZMgZMdXKdXKbUI_SG_SG_SGaUIcWKeYLcWJbVJbVJbVJdXLdXLdXLfZMdXLfZMfZMfZMfYMcWKcWKcWKfYMeYMeYMcWKeYMcWKcWKaUIaUI^SG^SG\QF\QFaUIaUI^SG\QE\QEcWKeYLeYLeYLcWK^SG_SGZODXMBXMBXMBXMBZODZODXMBVK@XMBXMB[OD[OD[OD[OD[ODYNBWL@WL@WL@WL@WL@ZNBZNBXLAUJ?VJ?VJ?SH=TH=TI=TI=TI=TI=WK?YMAYMAZMAZMAZNAZNAzePzeP{fP|fQ}gQ~gQ~hRiRlTlUmUnUnVoVoVpWqWqWrXrXsXtYtYuZvZy\z]z]{^|^|^}_~_~_``ehhhiiffffffffffeeeeebbbbbbbaaaa~a~`~`}`}`|`|`y]x]x]x]w\w\v\v\v[u[u[t[t[sZv\u\u\rZqYqYpYpYpXoXoXlVkUkUkUoYlWlVkVzfRyfRyeR{gS{gS}iU|hUmX~jVzgT}iV|iV{hU~jWo[nZp\p\o\o[n[n[m[o\mZ|jXygV{iXziWyhWyhWveUscSrbSrbRl]On_Pn_Pm_Po`Ro`QqbSsdUufVueVteVqcTqbTqbTsdUpbSm_Qk]Ph[Nh[NgZMgZMeXKbVJ_TH_SH_SHaUIcWKaUIaUIaUIcWKcWJeXLeXLeXLgZNdXLgZNfZMfZMfZMdXLdXKdXKdXKfYMdWKaUJcWKcWKcWKaUI_SH_SH\QF\QF\QF\QF\QF\QF\QF\QF_SGeYLeYLeYL_SGZODVK@XMBXMBXMBXMBXMBXMB[ODVK@VK@YMBYMB[OD[OD[OD[PDYNBWL@WLAWLAWLAWLAUJ?UJ?XLAXLAXLAVJ?TH=TI=TI=TI=VK?WK?WK?YMAYMAZMAZNAZNAZNAZNA[NA}gQ~gQkTkTlTlUmUnUnVoVoVpWpWqWrXrXsXtYtYuYuZvZw[z]{]{^|^}_}_~_`bcghhiiiiigfffffffffeeeeccbbbbbbaaaa~a~a~`}`}`}`|`y]y]x]x]w\w\v\v\v\u[u[t[t[tZsZu\u\t\qYnWqYpYpXoXoXnXkVkUkUjUlWlVkVkVzfRyeRxeQ{gSzgSkWkWmXxeRzgT|iV|iU{hU{hUq]p\p\p\o\o[n[n[m[mZ|jX|jXygVxgVziXzhWyhWveUscSrcSo`Ql^On_Pn_Pm_Pm^PrbSqbSqbSsdUufVrcTrcTqcTo`RqbTn`Ri[Ni[Nh[Nh[Nh[NeXLcVJ`TH`TH`THbVIbUIaUIaUIaUIaUIaUIcWKcWKeYLg[NeXLgZNgZNgZNdXLdXLdXLdXLdXLdXLbVJbVJdXKdXKaUJaUJ_SH]QF]QFZODZOD]QF]QFZOD]QF]QF_SGcWKcWK_SGZODVKAVKAXMBXMBXMBXMBVKAVKAYMBTI?TI?VKAWLAWLAYNBYNBYNBUJ?WLAUJ?UJ?UJ?UJ?UJ?UJ?XLAXLAVJ?VK?VK?VK?VK?WK?WK?WK?WK?ZMAZNAZNAZNAZNAZNB]PC]PC]PClTlUmUnUnVoVoVpWpWqWrXrXsXsYtYuYuZvZvZw[{]{^|^|^aabbbeehhiiiiiiigffffffffffeecccbbbbbbbaaa~a~a~`}`}`}`|`y]y]x]x]w\w\v\v\v\u[u[t[t[t[sZsZu\t\qYnWnWpYpYoXoXlVkVkUkUjUjUlWlVkVzfRyeRyeR{gS}iUkWkW~jV~jVzgTzgT|iV{hU~jWlYq]p\p\o\o\o[n[n[}kY}kY|jXyhVygVxgVziXzhWyhWscSscSrcSo`Qo`Qo`Qn_PpaRpaRoaRqbSqbSqbSpbSpaSrcToaRqcTl^Pi\Ni\Ni[Ni[Nh[NfYLeYLaTH`THbVJbVJ`THbVJbVIbUIaUIaUIcWKcWKcWKeYLeYLeYLg[NgZNeXLeXLeXLdXLbVJ`TH`THbVJdXLbVJbVJ_TH[PE[PD[PD[PD[OD]QF]QF[OD[OD_SH_SHaUI[ODXMBTI?VKAVKAVKAVKAVKATI?TI?TJ?RH=TJ?TJ?WLAWLAWLAWLAUJ?RH=SH=SH=SH=SH=SH=SH=SH=VJ?VJ?VK?VK?YMAYMAYMAWK?WK?WK@WK@ZMAZNAZNAZNBZNB]PC]PC]PC]QD^QD`SEnVoVoVpWqWqWrXrXsXsYtYuYuZvZvZw[x[{^`aaabbbceffiiiiiiiiiiggfffffffffeeccccbbbbbbaaa~a~a~a}`}`}`|`y]y]x]x]w]w\v\v\v\u[u[t[t[t[sZsZrZt\t\nWnWnWpYoXoXnXlV~iSkUjU|gS~jU~iTkV}hTyfRyeR{gS}iU}iUkWkW~jV{gTzgTygTkX~kW}jWlYq]p\p\o\o\o[n[~lY{iWzhWzhWyhVygVxgVxgVziWwfUscSscSpaQp`Qo`QqbRqbRpaRpaRpaRrcSqbSqbSn`Qk]Ok]Om_Qj]Oj\Oj\Oi\Ni\NfYLfYLfYLcWJaUHcWJ`TH`TH`THbVJbVJbVJbVIdWKdWKdWKcWKh[NeYLeYLcWKcWK`UI`TIbVJ`TI`TH`TH`TH`TH`TH`TH[PE[PE[PE[PE[PE]RF]RF]RF[PD[PD[PDYNC]RFYNCVLARH=RH=RH=TJ?VLATJ?TJ?RH=WLARH=PFSH>SH>SI>TI>VK?YMAYMAYMAYMAYMAYMAWK@WL@ZNBZNBZNB]PC]PC]PD]PD^QD^QD^QDaSFaSFsYtYtZuZvZv[w[w[x\y\y]z]}_~`bcccdddeehhiillllliiiiiigggfffffffffcccccbbbbbbbaaa~a~a}`}`}`|`y]y]x]x]w]w\v\v\v\u[u[t[t[t[sZsZrZrZt\nWnWnWpYoXoXnXnXkV}hS}hS|gS{gS~iU}iTkVyfRyeR~jV}iU}iU|iUkW~jW{hTzgT}iVygT~kX~kW}jWlYn[p\p\o\o\lZ~lY~kY{iWwfUzhWyhVygVxgVxgVtdTwfVtdSscSscSrcSrbSqbSqbRqaRpaRpaRoaRqbSqbSl^Pl]OfYLdWJfYKeXKj\OgZMgZMgZMgYLfYLaUIcWJaUHaUH`TH`THbVJbVJbVJdXKdXKdXKfZMh[OfYMaUIaUIaUIaUIaUIaUI\QE\PE\PE^RG\PE[PE[PE[PE[PE[PE[PE[PE]RF]RF[PEYNCYNCVLAVLATJ?TJ?RH>RH>RH>PFPFSH>SH>SH>SH>SH>SH>SI>VK?TI>TI>VK@VK@WK@WK@YMAYMAZNBZNBZNBZNB]PC]PD]PD]PD]QD^QD^QD^QDaSFaSFaTFaTFv[v[w[w\x\y\|_}_}_~`~`cccdddeefhiiilllllllllliiiggggfffffffcccccccbbbbbbaaa~a~a}`}`}`|`y]y]x]x]w]w\v\v\v\u\u[t[t[t[sZsZrZrZt\qYnWnWmWoYoXoXnXkVkU|hSyeQxeQ{gS}iT}iT|hT|hT~jV~jU}iU}iUkW~kW~jWxeR}iV|iVyfT~kX~jW}jWlYlYp\p\mZmZlZ~lY{iW{iWwfUzhWyhWygVxgVxgVudTtdTtdTvfUueUrcSrbSrbSqbSqbRpaRpaRpaRoaRl^Pj\NdWJdWJdWJfYLcVJcVIcVIbVIeXKdXKi\NdWKaUIaUIaUIaUHaTHcVJcVJeXLeXLbVJdXKfZMbVI_SH_SH_SG_SG_SG\QF\QE\QE\QE\QE\QEWLBWLBYNCYNCYNCYNCYNCYNCYNCWLAWLATJ@RH>MD:PFPFPFPFSH>SH>SH>SI>SI>VK@VK@TI>TI>TI>TI>WK@WK@WK@WK@WL@ZNBZNB_RE_RE]PD]PD]QD^QD^QD^QD^QDaSFaSFaTFaTFbTFbTFx\|^|_}_}_~`~``ccdddeeehhiijjjjllllllllllligggggffffffcccccccbbbbbbaaa~a~a}a}`}`|`y^y]x]x]w]w\v\v\v\u\u[t[t[s[sZsZrZrZqZqYqYmWmWmVoXnXnXnXkUjUyeQxeQxdQwdP}iT|hTyeR{hT~jV}iU}iU|iUkW~jW}jV}jV|iV|iV{iV~kX}jW}jWlYlYnZmZmZmZlZ|jX{iWxgUwfUzhWyhWyhVxgVueTudTwfVvfVvfUscSscSrcSrbSqbSqbSqaRpaRm_Pm_PeXKbUIeWJdWJbUHdWJcWJcVJcVJcVIbVIeXK`SG]QE]QE]QE\QE\QE^SG^RG`TH`TH`THbVJdXL`TH`TH_TH_SH_SH_SH]QF\QF\QF\QF\QEZODWMBWMBWMBZNCYNCWLBWLBWLBWLBWLAUJ@UJ@ND:KB9ND:PFPFSH>SI>VK@VK@VK@VK@TI>TI>TI>TI>TI>WK@WK@WL@WL@WL@XL@]PD_RE_RE`SF]QD^QD^QD^QD^QDaSFaTFaTFbTFbTFbTFbTFcUF}_~```acdddeeefhiiijjjjjjjjjjjllllllliigggfffffddcccccccbbbbbbaa~a~a}a}`}`|`y^y]x]x]w]w]v\v\v\u\u[t[t[s[s[sZrZrZqZnWpYmWmWmWlVnXnXnXmX|hS|gSxeQxdQwdQzfR|hTyeRxeR{gT}iV}iU|iU|hUmY~jWzgT}iV|iV|iV{hV~kX}jWzgU|jW|iW{iWmZmZmZlZ{jXxgVxgUwfUzhWyhWyhWveUxgVwgVziXtdTsdTqaRscSrcSrbSrbSqbSqbSn_Qi[MhZMcVIcVI`SGbUHbUHdWJdWJcWJcVJcVJeXK`TH`TG]QF]QE]QE]QE\QE_SGaUIaUHaTHeXLcVJbVJ`TH`TH`TH_TH]RF]QF]QF]QF]QFZODXMBXMBXMBUK@UK@UK@UK@UK@WLBUJ@SH>UJ@PFTI>TI>TI>TI>UI>WL@WL@WL@XL@ZNBZNB]PD]PD`SF`SF`SF`SF^QD^QD_QDaTFaTFbTFbTFbTFbTFcUGcUGcUGaadddeeefhiiijjjjjjjjjjjjjjjjjjllliiiiigfffdddcccccccbbbbbbaa~a~a}a}`}`|`y^y]x]x]w]w]v\v\v\u\u[t[t[s[s[sZrZrZqZnWnWpYmWmWlVlVnXnXmXjU|gS{gSxdQwdQzfR|hTyeRucPxeRweRzgSyfS|hU~kWmY}jWzgT|iVygTyfTxfTxfTzhUzgU|iW{iW{iW}kXmZmZ|jXygVxgVxgUwfUwfUyhWyhWxgWxgVziXziXtdTtdTqaRpaRpaQrcSrbSqbSl]Oi[MfYKaTGaTG`TG`SG`SG`SGdWJdWJdWJfYLcVJcVJ`TH^RF]RF]QF]QE]QE]QEaUIaUIaUIeYLcWJcVJ`TH`TH`TH]RF]RF]RF]RF[OD[ODXMBVKAVK@VK@SI?SI?SI?SI>SI>UK@SI>UK@PF=ND;LB9LB9LB9LB9ND;ND;ND;ND;ND;ND;ND;J@7LB9ND;ND;LB9QFSI>VK@VK@VK@VK@TI>TI>TI>TI>TI>UI>UJ>WL@WL@XL@ZNBZNB]PD]QD]QD^QD`SF`SFaSFaSFaTFaTFbTFbTFbTFbTGbUGcUGcUGcUGdUGdUGdeeeffiiijjjjjjjjjjjjjjjjjjjjjjjigggiiiiidddddcccccccbbbbbaa~a~a}a}`}`|`y^x]x]x]w]w]v\v\u\u\u[t[t[s[s[rZrZrZqZnWnWmWmWmWlVlVnXnXmXjUjU{gSxdQwdQzfSyfR|hTxeRubPweRwdRwdQ|iU{hU{hU{hUweSweRygTyfTxfTxfTxfTzhUygU|iW{iW{iWziW}kXmZ|jXygVxgVxgVziWzhWyhWyhWxhW{iXziXwgVtdTqbRqaRpaRpaRpaQj\Nj\Mj[MdWIdWIaTGaTGaTG`TG`SG`SGbUHbUHdWJdWJcWJaTHaTH^RF^RF]RF]QF]QF_SG_SGaUIcWKcWJcWJaUH`TH`TH^RF^RF^RF[PDVLAYNCVKATI?TI?SI?SI?SI?SI?QG=QG=SI?SI>QG=QG=QG=LC9LC9LB9J@7LB9ND;ND;ND;ND;ND;ND;J@7LB9NE;LC9LC9LC9LC9OE;SI>TI>TI>VK@VK@VK@TI>TI>TI>TI>UI>UJ>UJ>UJ>UJ?XL@ZNBZNB]PD]QD]QD^QD^QD^QDaSFaTFaTFdVHdVHdVHbTGbTGcUGcUGcUGcUGdUGdVGgXIgXIhhkklllllllmjjjjjjjjjjjjjjjjjjjjgggggggddffdddccccccbbbbbba~a~a}a}a}`|`y^x]x]x]w]w]v\v\u\u\u[t[t[s[s[rZrZqZqZnWnWmWoYlWlVlVkVnXkVjUjU~jU{gSzfSzfSyfRyfRxeRubPweRwdRzgSyfSyfSxfS{hUweSweSweRvdRyfTxfTxfTwfT}jWygU|iW{iW{iWzhW}kXzhVyhVygV{iX{iXziWwfUyhWyhW{jY{jXxgVwgVtdTqbRqbRn_Pn_Pk\NeXJeWJbUHbUHdWIaTHaTGaTGaTG`TG`SGbUIdWJbUHaUHaUH_RF^RF^RF^RF^RF]RF`TG_SG_SGdWKdWKdWKaUIaUI\PE\PE\PEWLATJ?WLATJ?TJ?TI?TI?TI?TI?QG=QG=OE;OE;OE;LC9QG=OE;QG=LC9LC9G?6JA7JA7LC9NE;NE;NE;NE;NE;JA7JA7LC9LC9H?6JA7LC9MC9OE;TI>TI>VK@VK@TI>TI>TI>TI>UJ>UJ>UJ?UJ?UJ?XL@ZNBZNB[NB]QD]QD^QD^QD^QD^QD^QDaTFdVHdVHdVHeWHeWIcUGcUGcUGcUGdUGdVGgXIgXIj[KhYIllllllmmmmmmmmmmjjjjjjjjjjjjjjjgggggeddddddddaacccccbbbbba~a~a}a}a}`|`y^x^x]x]w]w]v\v\u\u\t[t[t[s[s[rZrZqZqZnWmWmWmWlWlVlVkVkVkVjV~jU~jUlW}iUzfSyfRyfRucPxeRweRwdRzgTyfSyfSxfSxeSxeSweSweSvdRvdRxfTxfTxfTweTzhUygU|iW{iW{iWziWzhWwfTyhVygV{iX{iXziXwfUwfU|jY{jY{jYxgWwgVrbRo`Pl]Nl]Ni[LfXJfXJcUHbUHbUH_SF_RF_RF^RF^RE^REcVIbVIbUIbUIbUH_SG_SF\PD\PD\PD[PD^RF[PD[OD]QF_SG_SG_SGZOCZOCWLBWLAUJ@WLATJ?TJ?TJ?TJ?TJ?TJ?RG=QG=OE;OE;MC:OE;OE;LC9OE;QG=LC9JA8H?6JA8JA8LC9OE;OE;LC9LC9H?6E=4E=4JA7H?6JA7MC9MC9MC9MC9OE;RG=RG=RG=RG=RG=RG=UJ?UJ?UJ?UJ?UJ?XL@XLAZNB[OB]QD]QD^QD^QD^QD^QE_RE_REaTGdVHdVHeWIeWIeWIeWIfWIdUGdVGdVGgXIgXIj[Kj[KhYJhYJoooommmmmmmmmmmmmmmjjjjjjjjjjjggggeeeddddddddccaa~a~`~`}`}`|`|_|_~a~a}a}a|`y^y^x^x]w]w]w]v\v\u\u\t[t[s[s[r[rZrZqZqZnWmWmWmWlWlVkVkV}hT|hTjV~jU~iUlWkWkWyfSyeRxeRubPtbPzgTygTyfTyfSxfSxfSxeStcQtbQtbQsbPvdRxfTxfTwfTweTweT|jW|iW{iW{iWziWzhWwfUyhVygVxgVxgVudTudTwfUyhWyhWscSscSrcSo`Ql^Ol]OgYKfXKfXJcVHcVHcUHbUH_SF_SF_RF_RF^RFaTG^RE^QE`SG`SG_SG]QE\QEZNCZNCYNC\PDYNB[PD]RF]RF_SG]QFZODXMBXMBUJ@UJ@UJ@UJ@UJ@UJ?TJ?TJ?RH>RH=OEPFPF5I@7I@7H?6H?6H?6F=5D;3A91A91F=4F=4H?6H?6H?6F=4A91A91D;3D;3F=4F=4H?6I?6I?6I?6F=4F=4G=4KB8KB8LB8LB8LB8QFVK?YMAYMA\OC\PC\PC\PC]PCbTGbUGbUGcUGcUGcUGcVHfXIgXJgXJj[Lj[Lj[Lm^Nn^Nl\Ll\Ll\LjZKj[KhYIhYIiYIiYIqqrrrrrrrrrrrrrrrqqqoooooljjjjjgggggggffffffcccccbbbb~b~az_z_y^v\v\u[u[t[t[s[sZrZrZqZqYpYpYoYoXoXnXkVkUjUjU~iU~iUkV}hT|hTyeR{gT{gT}iU}iU}iUkWkW~kW~jW~jV}jVlXzgTwdRvdRvdRyfTxfTxfSxfSweSweSq`Oq`OsbQsbQsbQsbQraPudRwfTwfTweTveTveTygVxgUudSudSudStdStcSqaQscSscRscRrbRrbRrbRqbRqaRqaRn_PhZLeWJeWIeWIdWIdWIdVIdVIcVIcVIcVHbUH`SF_SF_SF_RF_RF^RF^RF^RF^REXMBXMBZOC]QEZOCZNCZNCYNCWLAWLATI?VKAVKAVK@SI?QG=NE;NE;ND;ND;ND;ND:KB9G>5F>5I@7F>5F>5I@7I@7F=5D;3A91A91D;3D;3D;3F=5H?6D;3D;3A91D;3D;3F=4F=4I?6I?6I?6F=4F=4G>4G>4LB8LB8LB8LB8LB8OD:OD:TI>TI>VK?WK?YMAYMAZMA\PC\PC]PC_RE`SE`SEcUGcUGcUHdVHdVHgXJgXJj[Lj[Lk[Lm^Nn^Nl\Ll\Ll\Mm]Mj[KhYIiYIiYJiZJjZJjZJooooooooooooooooooooooooolllljjjjgggggffffffeeeeebbb~b~a}a}ay^v\v\u\u[t[t[s[sZrZrZqZqYpYpYoYoXnXnXkUjUjU~jU~iUzfRzfRyfRyeRxeRxeR}iV}iU|iU|iU|hU~kW~jW~jW}jVlXlX|iV|iVsbPvdRudRxfTxfTxfSweSweSq`Oq`Oq`OsbQsbQsbQudSudRudRwfTwfTweTveTveTygVxgVsbRudSwfUtdSqaQtcSscSscSscRp`Pp`Po`Po_Pl]Nl]Nn_PfXJeXJeWJeWIeWIdWIdWIfYKfXKcVIcVI`SG`SF`SF_SF_SF_RF\PDYNBYMBYMBYMB[OC[OCZOCZOCZOCZNCZNCWLATJ?TJ?RG=TI?OE;OE;OE;NE;NE;NE;LB9LB9I@7D<3D<3G>5G>5F>5I@7I@7I@7D;3D;3A91D;3A91A91H?6F=5D;3D;3A91D;3D;3F=5F=5I@6I@6F>5G>5G>5G>5G>5I@6LB8LB8LB8OD:OD:OD:OE:RG5G>5G>5I@7I@7I@7D<3D<3?70B91B91D;3B91B91D;3D;3B91D;3D;3F>5F>5I@6G>5G>5G>5G>5G>5G>5I@6LB8LB8LB8OD:OE:OE:OE:RGZNBZNBZNBZNB]PD]PD`SF`SF`SFaSFaTFdVHdVHgYJgYJj[Lk[Lk\Ln^Nn^Nl\MjZKj[Kk[Kk[Kl\Ll\Ll\Lm\LjZJp_Nq_Nq`No^Moppppppppppppppooooooommmmllllllllliiifffffeeeeeddddcc|a{`x^x^w]w]v]v]rZrZqZqYpYpYoYoXnXkVjUjU~jUzgSzfSyfRyfRxeRxeRweRwdRwdQvdQvcQxfS{hU{hUzhUzgU}jW|iVlXlXkX{iV{iV{hVxfTucRucRweTweSweSweSq`Oq`OsbQsbQsbQvdSudSudSudSudStcRwfTweTveTveTscRsbRsbRrbRo_PrbQtdSqaQtcSscSpaQp`Qp`Qj[Mj[Lj[LgXJdVHaTFcVHcUHcUHeWJeWIdWIdWI_RE^RE^RE^QE^QE]QE]QEZNC]PD\PDZNBWL@YNB[PD[OD]QE[OCXMBUJ@SH>SH>PF6B:2B:2E<4G>5G>5I@7I@7I@7I@7D<3B:1B:1B:1B:1?70B91B91D<3?7/B91D<3D<3D<3G>5I@7G>5G>5G>5G>5G>5G>5J@7J@7LB8OD:OE:OE:OE:RGUJ>UJ>ZNB[NB[OB^QD^QD^QD_QD_RDbTFgYJgYJhYJk[Lk\Ln^Nn^Nl]Mj[Kj[Kk[Kk[Ll\Ll\Ll\Lp_Np_Np_Nn]Mo^Mo^Mo^Mp_Mpppppppppppppppppoooommmmmmllllllllkkkihfffeeeedddddc|a{a{`z`z`y_v]u]u\t\t\s[r[oYoYnXkVkVjU~jUzgSzfSyfRyeRxeRxeRwdRwdRyfSvdQucQucQucQweSweSzgUzgU|iVlXkX~kX~kX{iV{hV{hV{hVzhVucRtcRtcRweSweSweStbQsbQsbQsbQsbQudSudSudSudSudStcStcRwfTweTveTscRsbRsbRp`Po`PrbRrbQqaQqaQn_OqaQpaQk\Mj\Mj[MgYKgYJdVHaTFaSFcVHeXJeWJeWJdWIbUG_RE^RE^RE^RE^QE[OC[OC]QEZNCZNCZNBYNB\PD^RF[PDTI>SI>SI>SH>SH>PF5G>5G>5I@7I@7I@7I@7B:2B:2B:2D<3B:1?80?80B:1D<3?80?80D<3D<3D<3G>5G>5G>5G>5G>5G>5G>5G>5J@7J@7H>5LC9MC9MC9OE:OE;PE;PE;PE;SH=UJ>XL@XL@YL@[OB\OB^QD_QD_RD_REbTGbUGeWIfWIiZKn^Nn_OjZKj[Kk[Kk[Lk[Ll\Ll\Lm\Lm]Lm]Lq`Nq`Or`Oo^Mm\Km\Ln]Ln]Lpppppppppppppppppppommmmmmmllllllllkkkkkhheeeeedddd|a|a{a{`z`z`y_x_x_w^t\s\s[r[q[qZnXmXmX~jUzgSyfSyfRxeRxeRwdRwdRvdQvdQxfSxfSucQtcQtbQweSvdSvdRvdRygT~kX~kX~kX~kX{hV{hVzhVzhVzhVzhVweTtcRtcRygUygUveSsbQsbQsbQsbQsbQudSudSudSudSudStdStcStcRweTtcRscRp`PsbRp`PrbRrbRrbRrbQo_On_On_OhZKhZKhYKhYKgYKgYKdVIdVHcVHcVHcUHcUHeWJbUHbUH_RF_RE^RE\OC[OC^QE[OC[OC]QEbUHWLAWLATI?RG=TI>TI>SI>SI>SI>PF6G>6G>6I@7I@7G>5G>5D<3B:2B:2B:2B:2@80@80D<3D<3B:1B:1B:1B:1D<3G>5G>5G>5G>5G>5G>5E<3E<3G>5H>5J@7MC9MC9OE;OE;PE;PE;PE;PF;SH=VJ?XL@XMAYMAYMAYMAZMA_RE_REbTGbUGcUGcUGfWIl\MiZKjZKhYIk[Kk[Lk\Ll\Ll\Lm]Lm]Lp_Nq`Oo^Mo^Mp^Mp_Mn]Ln]Lo]Lo^Lm\Jpppppppppppppppppppmmmmmmmmmlllllllkkkkkjjhgeedddd|a|a{a{`z`y`y_x_w_w^v^v^r[r[q[nXmXmX~iU}iU|hU{hT{hTzgTzgTyfTvdQucQucQweSweStbQsbQsbQsbPvdRudRudRudRxfT}kX}kX}kX}kXzhVzhVweTzhVzhVzhVyhVygVveTygVveSsbQsbQsbQsbQsbQsbQudSudSudSudSudStdStcStcStcRtcRq`Pp`Pp`PsbRrbRrbRrbRo_Po_Pn_OiZKhZKhZKeWIbUGbUGdWIgYKdVIaTGaTF`SFcUH`SF`SF_SF_RFZNBYNBYMBVK@YMAXMA]QE]QEZNCUJ?RH=RG=RG=RG=TI>TI>QG6JA7G>6G>5G>5E<4B:2B:2B:2B:2B:2@80D<3B:2B:2B:2B:2B:2B:2E<3G>5G>5E<3E<3E<3E<3E<3H>5H>5JA7JA7OE;OE;PE;PE;PE;PF;PF;VJ?VJ?VJ?YMAYMAYMAZMAZMA_RE`RE`RE`SEaSEcUGfXIgXIj[KhYJhYJhYJiYJl\Ll\LjZKk[Kn]Mn^Mo^Mo^Mp_MsaPsbPtbPr`No^Lp^Mp_MtaOppppppppppppppppppmmmmmmmmmmllllllkkkkkkjjjggeddd|a|a{az`z`y`x_x_w_v^v^u^t]q[nXmX~jV}iU}iU|hU{hTzgTygTyfTxfTxfSweSweSveSygUygUsbPraPraPraPraPucRtcRtcRwfTweTzhV}jX|jXzhVzhVweTyhVyhV|jX|jX|jX|jXygVsbRveSsbQsbQsbQsbQsbQsbQrbQudSudSudSudStdStdStcSqaQqaQqaPn^Np`PscRsbRrbRo`Po_Po_Pl]NiZLiZKfXIeWIeWIhYKgYKgYKdVIaTG^QE^QD[OB[OB]QDZNBWL@WL@WK@WK@VK@YMA[OC[OCXMASH=PF;PE;PE;OE;OE;OE;OE;OE;ND:I@7I@7I@7I@6F=5D;3C;3C;3C;2A91E=4H?6H?6H?6JA8G?6G>6E<4E<4B:2B:2@80B:2B:2E<4B:2B:2B:2B:2B:2B:2@80E<3B:2B:2E<3E<3E<3E<3E<3H>5JA7JA7MC9OE;PE;PE;PE;PF;SH=VJ?VJ?VJ?VK?YMAYMAZMAZMA]PC]PC`RE`SEaSE^QDaSFbTFeVHeVHcTFfWHfWHgXHgXIhXIk[Kn]Mq`Or`OraOsaPsaPtbPtbPr`NraOp^Mq_Mn]Ko]Ko]Lpppppppppppppppppnnmmmmmmmmmllllllkkkkkjjjjiigfd|a{a{az`z`y`x_w_w_v^u^u^q[p[mX~jV}iU|iU{hUzgTygTyfTxfTweSweSvdSvdSudSudRucRwfTtcRtcRtcRq`Pq`Pq`PscRscRveTveTveTygVygVygVygVveTveT|jX|jX|jX|jX|jX|jXygVveTsbRsbRsbQsbQsbQsbQsbQrbQudSudSudSudSudStdStdSqaQqaQk\Mn^Op`PscRsbRp`Po`Po`Pl]Ni[LiZLiZLfXJhZKhZKhYKgYKgYKdWIaTGaTG^QE^QD[OBXL@ZNBXL@WL@WL@WK@\PD[OCYMAVK?SH=SH=PF;PF;PE;OE;OE;LC9OE;LB9LB9I@7I@7G>5D;3D;3D;3D;3A91A91A91C;2E=4E=4E=4E=4E<4G?6E<4E<4B:2@80B:2B:2E<4E<4B:2B:2B:2B:2@80>6.B:2B:2C:2C:2C:2E<4E<4E<4H?5JA7MC9MC9MC9PE;PF;PF;SH=SH=VJ?VJ?VK?VK?WK?ZMA\PC]PC]PC]PC`SE^QD^QDaSFbTFbTF`RDeWHfWHfWHgXIgXIj[Kn]Mn^Mo^MraOraPsaPsbPtbPr`Nr`OsaOp_MkZIo]Ko]Lp^Lm\JkZHppqqqqqpppppppppnnnnmmmmmmmmmlllllkkkkkjjjjiiidd{a{az`y`y`x_w_v_v^r\q[p[mXlX|iU{hUzgUygTucRtcRsbQudSudStcStcRscRsbRsbRsbRrbRrbRrbRrbRrbRrbRrbRrbRrbRudTudTudTueTsbRueTveTveTxgV{iX{iX{jX{jX{jX{jX{jXygVygVsbRveTsbRsbRsbQsbQsbQrbQrbQudSudSudSudStdSraQqaQqaQn^On^On^On^OscRp`Pp`Po`Pj[Li[Li[LiZLiZLhZKhZKhZKhYKgYKdWIaTG\OCYMAYMA[OC[OB[NBZNBWL@\PD\PDWK@VK@VK@SH>SH>SH=PF;PF;PE;MC9MC9JA7JA7LC9J@7G>5G>5B91B91A91A91?7/A91A91C;3C;2A91E=4C;2E=4E=4C:2C:2>6.>6.@80B:2B:2B:2B:2B:2@80@80>6.>6.C:2C:2C:2C:2C:2C:2C:2E=4JA7MC9MC9MC9MC9NC9PF;SH=SH=SH=VJ?VK?VK?WK?WK?ZNA]PC]PC[NB[NB[OB^QDaTFbTF_RDbTFcUFfWHfWIjZKjZKj[Kn]Mn^Mo^Mo^Np_NsaPtbPq`Nr`Op^Mm\Kn]Kn]Lr`No^Lm\JhWFhWGlZIl[Isssqqqqqqqqqqppnnnnnnmmmmmmmmlllllkkkkjjjjiiifd~c}cz`y`x`w_w_v^r\q[nYmX}jVyfSweSvdRucRtcRrbQn_Om^Om]Nl]NiZLhZLhZKhYKhYKj\MhYKhYKk\Mk\Mk\Mk\Mk\MqaQtdSqaQo_Oo_Oo_Oo_Oo_OrbRxgVxgVxgV{iX{iX{iX{iX{iX{iXxgVxgVxgVveTsbRveTsbRsbRsbRsbRsbQrbQudSudSudSudSudSraQqaQo_On_On^On^On^Ok\Mm^Nm^NgYJj[Ll]Ni[LiZLiZLfXJfXJeWIeWIeWIbTG_RE\PC\OC[OC[OC[OC[OB]QD]PDWL@WL@WK@VK@TI>SI>SH>SH>PF5E<3B:1B:1B:1B91B91A91A91A91D;3C;3A91A91A91C;2C;2C;2@80@80>6/;4->6/@80C:2C:2@80C:2>6.>6.;4-;4-@80C:2C:2C:2C:2C:2C;2C;2KA7MC9MC9MC9NC9PF;SH=SH=SH=SH=VK?VK?WK?YMAZNAZNAZNB[NB[NB[OB\OBaTF_RD_RDbTF`REfWHiZKjZKjZKm]Mn]Mn^Mo^No_Np_Nq_Nq`No^Mo^Mm\Kn]Kn]Lr`No^Lm\JhWFhWGlZIl[Im[IjYGkYHsssssssqqqqqqqnnnnnnnnmmmmmmmlllllkkkkjjjjiifff}c|cy`x`w_v_s\oZkW~jV}iVxfSveSraPn^Nm]NiZLfXJeWJ_RE^RE^QE_SF_RFfYLeXKbUI_SGdWJeXJhZLhZLk]Ol]Ol]Ol^Om^Om^OpaQn^On_On_On_OqaQtdStdSwfVziXziX{iX{iX{iX{iXxgVxgVxgVxgVveTveTsbRsbRueTueTsbRsbRrbRrbRudSudSudSrbQrbQraQo_On_On_On_Ok\Mk\Mk\Mj\Mj[Lj[LgYJgXJgXJfXJfXJfXJeXJcUHeWIeWI_RE\PC\OC\OC[OC^QE[OCZNBZNBWL@WL@WK@TI>TI>TI>SH>SH>PF6/>6/<4-@80@80@80>6/;4-C:2@80>6.<4-<4-@80@80@80A80C;2C;2F=4F=4KA8KA8KA8NC9PF;PF;QF;SH=SH=VK?VK?YMAYMAWK@WL@XL@XL@[NBYL@YM@\OB\OB_RDbTF`REaSEaSEgXIgXIk[Kk[Kl\Ll\Lm\LkZJk[KiYIl\Km\Km\Kn]Lo]Lo^LjYHjYIkZIn]Kl[IjXGjYHhWFhWFiWFiXFqtttttttssqqqnnnnnnnnnnmmmmmmlllllkkkkjjjiifffe|c{bx`t]pZmXlW~jWygTucRp`Op`OgXIgYJeVHn^Ok\MqaQj[Mi[MhZLgZL`TGbUI^RFYNCRH>SI?UK@]QF]QF`THi[Ni\Nj\Nk]Nk]On`Qo`Qo`Qm^OpaQpaQqaQqaQtdSwfUziXziXziXziX{iXxgVxgVxgVxgVxgVueTueTveTveTsbRveTueTueTueTrbRrbRudTudTrbQudSrbQo_Oo_On_Ok\Mk\Mn^OhYKhYKj\MgYKgYJgYJgXJfXJcVHfXJfXJeWJeWIbUGbUG_RE_RE\OC[OC[OC[OC[OCZNBWL@WL@TI>TI>TI>TI>SI>QF7/<4-<4-A91>6/>6/>6/>6/>6/C;2@80>6/>6/>6/@80A80A80A90A90F=4F=4H?6KA8KA8ND:ND:ND:PF;QFUJ>VJ>VJ>YL@YM@YM@ZMAbTFbTF`SEaSEaSEdVGj[Kk[Kk[Ll\Ll\LjZJk[JiYIiYIjYIgWGhXHiXHiYHjYHjYIn\Kn]Ko]Kl[IgVFhWFhWFiWFiXFjXGjXGjYGtttvvtttttttqqoonnnnnnnnmmmmmmllllkkkkjjjigffe|c{bt]q[mXlX{hUzgUsbPm]MgXIk\Lo_OygVm[|jXo]o]ziXyhX{jYziYyiXo`Qj\OfYL_SGZODXNCVLBOF=SJ@XND^SGdXLfYLi\Nj\Nm_Pn_Qn_QrbSo`Qp`QpaQpaQqaQyhWyhXziXziXziXxgVxgVxgVxgVxgVxgVueTueTveTveTveTveTsbRveTueTueTueTudTxgVudTrbRrbRo_Oo_Oo_On_On_Ok\MhZKhYKeWIj[Mj[MgYJgYJgXJfXJfXJfXJeXJcUHeWJbUG\PCYMA\OC\OC[OC[OC[OCZNCZNBUJ>TI>TI>TI>TI>LB8LB8I?6I?6F=4F=4C;2C:2C:2C:2@80@80@80=5.:3,:3,=5.?7/A91A91A91A91A91<5-<5-<4->7/<4-<4->6/>6/>6/A91A90<4->6/A90C;2C;2A90A90C;2C;2C;2H?6MD:ND:ND:ND:ND:ND:LB8OD:TI>TI>TI>UI>UI>UJ>UJ>VJ>VJ>VJ?YMA_RD_RE`RE`SEaSEaSEdVGgXIk[Kk[LiZJjZJjZJhXIiYIiYIgWGhXHeVFcTDdTDjYIkZIn]Ko]KiXGjYHkYHhWFiWFiXFjXGjXGdTCeTCeTCfUCrrtttwvvvttqqqqqqqnnnnnnnmmmmmllllkkkjjjigffe{cu^t]kV}jV|iVxfTo^Np_Oq`P|iWo\uas_p]r_wcwcvcn\{jYziYyiXxhXvgWrcTm_QnaSm_R`TI^SHVLBQH?RI@TKA]RGfZMeXLfYLi\Nj\NpaRqbSn`Qo`Qo`Qp`QpaQvfUyhXyhXwfVwfVwgVxgVxgVxgVxgVxgVueTueTveTveTveTveTveTveTveTxgVueTueTrbRudTudTudTudTrbRrbQqaQn_Ok\Mk\Mk\MeWIeWIhYKgYKgYKgYJfXJi[LiZLcUHcUH`SF_SFbUG_RE\PC\OC[OC[OC[OCZOCZNCWLAUI>RG7/<5-<5->7/<4-<4-<4-A91A91<4-<4-A91C;2C;2F=4F=4C;2C;2F=4KA8KB8KB8KB8LB8LB8ND:LB8LB8MB8RGUI>UJ>UJ>VJ>VJ>TH=\OB_RD_RE`RE^PCaSEdUGdVGeVHhYJiYJiZJjZJhXIhYIiYIgWGgWHbSDcSDcTDdTEjZIkZIlZIfVFgVFjYHkZHlZHiXGjXGgVEeTCeTCeTCfUDfUDjXFjXFppprtttwwwwttrrqqqqqqqpnnnmmmmllllkkkjjjggfyaxar\nYkW}jWzhUxfTvdS~kXt`u`u`uauar_r^r^n\p^|jY{jYpaQo`Qn_Pm^PrcTm_Qo`Rm_Rn`Si]Pj^Q]RHXNDPH?TKBQH?aVKeYMgZMh[Nk^Pl^Pm_Pn_Ql]Ol^Oo`QrcSscSscTtdTwfVwfVwgVwgVxgVxgVxgVxgVueTveTveTveTveTveTveTveTveTveTveTveTueTxgV{iXxgVxgVrbRrbRrbRiZKiZKiZKfWIeWIeWIhYKgYKgYKgYKgYJfXJaSF`SF`SFcUHbUHbUHbUGaTG\PC[OC[OC[OC[OCXLAUJ?UJ?MC9J@7J@7I@7I@6I@6F=4F=4F=4A80@80>6.>6.@80=6.=6.;3,=5.=5.?70?7/?7/?7/?7/<5-:3,:3+:3+:3+<5-<5->7/>7/>7/>7/<5-70)<5-A91C;2C;2C;2F=4F=4F=4KB8I@6KB8LB8LB8LB8LB8LB8MB8OE:RGVJ?YMAYMA\OC_RE`RE]PCaSEdUGbTFbTFcTFfWHiZJgXHhXIhYIiYIaRDeUFeVFcTDdTEgWGhWGhXGfVFgVFgWFhWFiXGlZIjXGdTCeTCeTCfUDfUDgUDgVDkYFkYGo\Ir_Knnnppppruwwwtttttqqqqqqpppppmmmlllkkkjjg~ex`w_v_oZoZlXlYnZr]v`ycwbwbxbu`r^o\~kYzhWveTudTwgVyiXrbStdUm^Pl]OqbSl^PqcToaSn`SrdVpcUk_RcXM_TJWNDOG?PH?SJAaVKg[Oi\Oj]Pk^Pl^Pj\Oh[Mi[Mi[Mm^Om^OpaRsdTtdTtdTtdTwgVxgVxgVxgVxgVueTveTveTveTveTveTveTveTveTveTygV{jXveTveTueTueTrbRo`Po_Pi[LiZLiZLfXIfXIfWIeWIeWIeWIbTGbTGdVIdVHaTFcVHcVHcUHbUHbUHbUHaTGaTG\OC[OC[OCUJ?SH=RH=MC9MC9LC9J@7I@7I@7D;3D;3D;2A90A90>6.>6.@80@80;4,=6.=6.=5.?80?70?70?70=5.<5.:3,:3,:3,:3,<5-<5-?7/<5-<5-:3+<5-:2+?7/?7/?7/D;3D;3D;3D;3I@6KB8ND:LB8ND:ND:LB8LB8LB8MC9OE:PE:RGLE=WNDcXMh[Oi\Pj]Pi\Nj\OhZMh[Ml]Ol^Om^OpaRpaRqaRqbRqbRrbRrbRrbRueTueTscRscRscSscSscSscSscSveUveUveUyhWveTscRscRscRp`Pp`Pp`Pm]Nj[Li[LfXJfXJfXJcUGcUGcUGbUGbUGbTGbTGaTGaTGaTFcVHcUHbUHbUHbUHbUHaTG^REYMASH=SH=SH=MC9MC9JA7J@7J@7J@7D<3D;3A91A91A91A90C;2C;2C:2@80@80=6.@80@80?80?80?70=5.=5.=5.:3,:3,:3,<5-?7/?7/?7/:3+<5-?7/?7/A91A91A91A91D;3F=5F=5I@6I@6I@6I@6I@7LB8LB8LB9MC9OE:PE;PE;PE;UJ?SH=VJ?WK?YMAZMA]PC`SE^QC^QD_QDbTFcTFcUFdUGgXIhXIfWGdUFdUFeVFcTDdTEdUEeUEfVFcTDdTDhWFhWGiXGdSCdTCeTDeUDiXFfUDdSBdSChVElYGo\Is_Ls`Lt`Lq]Jt`LxcOuaMxxsssqqqqnqppuuurrrrrqqonnnnmmmmlllkif|c{c{ch~fimuwxtŧұǡ޶״Ưâɰɯɱϴɧp__TIg[Ok_Sl`TpcWk_Tk`T[RIXPGLE=NF>dYNdXMh\OgZNh[Ni\Nj\Oh[Mk]Ol^Om^PpaRpaRqaRqbRqbRrbRrbRrbRrbRscSp`Qp`Qp`QscSveUveUveUveUveUyhWyhWscSscSscSp`Pp`Pp`Pp`PgYJdVHgXJdVHdVHcVHcUHcUGcUGbUGbUGeWIbTGaTGaTGaTGaSF`SF`SF]QD]PD\PDYNBWK@VK@SH=PF;KA7KA7KA7JA7JA7J@7D<3D<3D;3D;3A91A91C;2C;2F=4C;2C:2C:2E<4@80@80@80?80=5.=5.=5.:3,:3,:3,:3,?7/A91F=5D;3?7/A91A91A91D;3D;3D;3D;3F>5D;3G>5I@7I@7I@7J@7J@7LC9MC9MC9PE;PE;PE;SH=SH=VJ?VK?\OC\PC]PC]PC^QC^QD_QDbTFcTF`RDaSEdVGeVGcTEdUFdUFbSDcTEdTEdUEeUFcSDdTDdTDeUEiXGdSCdTCeTDeUDfUDgVDdSCeTChWElZGlZHp]Jm[Hn[Hn[Hn[HuaMvaMvaMvbMzeOá~Ţà}{Ÿ{yá~yvvsqqsssuuuutrrqqqnnnmmmmjjiiknqqy}xϣītzʪѰǡذѯղξǨg\Pk_SocWnbVk`Tl`Uf\RYPHLE>PH@]TJg\Pg[OfZNh[Ni\Nj\OhZMk]Ol^Ol^PpaRpaRqaRqbRqbRrbRrbRrbSscSscSscSscSscSscSscStcStcStcStcSvfUtcSqaQqaQn^On^Om^Om^Nj[LeVHdVHaTFaTFaSFaSFaSF`SFfXJeWIeWIbUG_RE_RE_RE^QE^QE^QD]QD]QDZNBZNBWK@WK@VK@SI>QF5F>5I@7F>5D;3A91I@7I@7D;3D;3D;3B91B91D<3I@7I@7I@7J@7J@7J@7J@7MC9MC9PE;SH=UJ?SH=VJ?YMA\OC\PC]PC]PC[NB^QD_QD_RDbTF`RE^QC_QCbTEcTFdUFeUFbSEcTEaRCeUEfVFcTDdTDeUEfUE`QAdTDbRBbRBcSB`P@aQAbQAeTCiWFiXFm[Hn[Hn[Ho\Io\IvaMvbMwbMwbNwbN{eP{eP{fP }ġ~ƣȤʥǣzϩ֯ذڱݴ ~ }{xvxzzwwwwvvvvusppprtww}äĥǣǡѲۻβlyήཚ˥ׯر۵׶ȺѾf\QnbVi^SocXodYdZQMF?JD=YQHdYNf[OfZNg[Nh[Ni\Oj]Ok]Ol^Ol^PpaRpaRqaRqbRqbRrbSrbSueUueUveUveUscSscStcStcStcStcStcSwfUwfUtcSqaQqaQn^On^On^Ok\MhYKhYJbTFbTFaTFaTFaSFdVHcVHcUHcUHcUG`RE_RE_RE_RE^RE^QE^QE[OC[NBZNBWL@WL@WK@TI>QF5G>5G>5G>5G>5I@7KB8KB8I?6H?6F=4F=4H?6MC:MC:JA8G>6G>6E<4D<4D<4B:2D<3D<3G>5G>5I@7F>5F>5D<3KB9I@7I@7B91B91B91B:1B:1I@7I@7J@7J@7J@7JA7JA7MC9RG=XLAXLAVJ?VJ?VK?YMAZMA]PC]PC[NB[OB_QD_RD]PB^PC^QC_QCbTEcTFdUFbSDcTEcTE^PAbSDcSDaRBbRCbSC]N?^O?bRBbRB`P@aQAaQAeTCfTCcRBjXFn[Hn[Io\Io\Io\IvbNs_Lt`Lt`LxcN{fP|fP|fQ|fQ|fQydNzà~ŢƣȤʦկӬծͨٱڲ滑轒庐•ș˜ţ¡ɧȦ˨ǥƤĤǦǦά˪ͬ⽙ǫϣ濘޹໖̥ѪڹٻͲqt~|ƨطšҬ̨Ҳ¸ŸŷǛ_VLlaVj_TodYg]SZQIOHAVNFbXNcXMeYNgZNh[Ni\Oj\Ok]Ol^Pl^PpaRpaRqaRqbRtdUueUueUueUveUveUvfUscStcStdStdStdStdSwfUwfUtdStdSqaQn_On_On^On^OhYKhYKhYKeWIbTFdVHdVHdVHdVHcVHcVHcUH`SF`SF_RE_RE_RE_RE^RE[OC[OC[OCXL@WL@TI>TI>QG5G>5J@7I@7ND;ND:KB8KB8H?6H?6MD:MC:JA8JA8H?6G?6G>6G>6G>6G>6D<4D<4G>5G>5I@7G>5G>5G>5LB9LB9I@7G>5B:1B:1B:1E<3J@7J@7J@7J@7MC9MC9MC9RG=WLAXLAVJ?VJ?VK?YMAYMAZNAXL@XL@[OB\OB\OBZMA[NA^QC_QC]OB^PBaSD_QC`QC^PAbSDbSD`QBaRCbRC]N?^O@^O@bRB`P@aQAdTCeTCcRBcRBjXFn[In\Io\Io\Ip]It`Lt`Lt`Lu`L|fQ|fQ|gQ|gQ}gQ}gQzdOzdO}gQ}gQtx{ŢǣͩԮѫӭծҬ⹏亐轒꿓˜̝ѡץܩݩު߫߫ޫݭҭ̨ױẒẒ๒ܷٵ忙լٰ̨Ӵгչs|hmr~ѲġȥڹƸͿ˗if\Rj`UoeZbYPd[RTMEUMF_ULbWMg[OfZNh[Nk^Qj\Ok]Ok^Pl^PpaRsdTsdUtdUteUueUueUveUvfUvfUvfUwfUtdStdStdTtdTwfVwfVwfVtdTtdSo_On_On_On_Ok\MhZKhZKhYKhYKhYKeWIdVHdVHdVHdVHcVH`SF`SF`SF`SF_RE_RE\PC\OC[OC[OCXLAXLAUJ>UI>RG6G>6G>6LC9LB9I@7I@7NE;I@7LB9LB9LB9LB9E<3E<3J@7J@7LC9LC9MC9OE;OE;RH=UJ?UJ?UJ?VJ?VK?YMAYMAZNAXL@XL@YL@YM@WK?WK?XL?[NA\OAZM@^PB\N@_QC]OA^PAbSDcSDaRCbRC`PA]N@^O@_P@]N>aQAeTCeTDcRBcSBn[IkYGlYGlZGmZGp]Jt`Lu`LuaLuaM|gQ}gQ}gQ}gQ}gQ}gRzeO~hR~hR~hR~hR~hR~hRqrx|}ЫװٲͨϪѫܴ罓×ț–꿔ʕǙԣդץצܩݪݫݫاک٪|ʧ˨̨ײҭẓຓ佖㾘忚լٱ̨âϳԹr}h~im|ŨںƤ޹ڷڹ´οijҷe\Srg\rg]jaWi`WOIBTMF\SJaWMf[OfZNg[Ok^Qj\Ok]Pn`Ro`RrcTsdUtdUtdUueUueUxhWvfUvfUvfVwfVwfVwfVtdTtdTwgVwgVwgVwgVudTrbRrbRo_Po_OqaQl\Mk\Mk\MhZKhZKhYKhYKgYKdVIdVHdVHdVHaSF`SF`SF`SF]PD\PC\PC\PC\OCYMAXMAUJ?UJ?RG6LC9JA7J@7J@7LC9LC9LC9LC9LC9LC9LC9LC9LC9JA7OE;OE;OE;RG=RH=UJ?UJ?SH=VJ?VK?YMAYMAZNBUI>UJ>VJ>VJ>WK?UI=XK?YL?\OAZM@[M@\N@_QC]OAaRDbSDcTDaRC_PA]N?^O@_P@]N?aQAeTCbRBcRBdSBn\IkYGiWEiWEp]Jq^JuaMuaMvaMydO}gR}hR~hR~hR~hR~hR{eP~hR~hR~hRhRhRhRhR~hR~hRsqr{|ʦЫҭٲͨϪ֯ḏ轓זǚțʜϠРѡҢ۩۩۩ݬ֧ŞuΪЬѭײݶҮҮẓšĝƟ俚խܴЬǦϳϵŮ|uuqyħͯƪ¢Բܺݽɹʺg]Tpe[mcZbZQTMGSMFYPH^UKdYNfZNg[Ok^Ql_Qm_Rn`RoaRrcTsdUtdUwgWxgWxhWxhXyhXyhXyiXziXwfVwgVwgVudTxgVxgVxgVudTudTrbRudTrbRrbRo_Pl]Nl]MiZKhZKhZKhZKhYKhYKgYKgYKdVIaTFaTFaSF`SF]QD]PD]PD\PD\PCYMAYMAVJ?UJ?RH=PE;OE;OE:OE:LB8LB8KB8ND:SH>RH=UJ?TJ?RG=QG=QG=QG=ND;ND;ND;KB9KB8H?6H?6H?6JA8JA8JA8JA8JA8G?6LC:JA8G?6JA8LC9LC9OE;OE;LC9LC9OE;QG=TI?RG=OE;RG=RH=RH=RH=UJ?SH=SH=QFWK@TI>UI>UJ>VJ>VJ>WK?UI=XK?XL?YL@ZM@[M@\NA\OA]OAaSDbSDcTE_PA`QA^O@_O@`PAaQA^O?bRBcSBgVEkYGlZGmZHjXFn[Hr^KuaMvbMzePzeP~hR~hR~hR~hRiRiSiSiSiSiSiSiSiS|fPiR|fP{fP{fPt`LustuvȥΪկױЫҬݵڳܴ罔鿔ʖǚСեצܪا٨ڨکեԥӤ˟fq|ĢƣѭײݷӮݷ۶šɠʣǡ̦ԮײέҶʱܼɬ}ũȬϲɪٹعؼ˻ôĴʶf]Upf\rh^e\TWPINHBVNF^UKcXNk^Rl_Sn`Sl_Rm`Rn`RoaRufWvfWyiYzjYxhWxhXyhXyhXyiXziXziXziXziXxgV{iXxgVxgV{jXxgVueTueTueTrbRrbRl]Nl]Nl]NiZLiZKiZKhZKhZKhYKj\Mj\Mj[MgYKgYKfXJ`SF`SF`SF`SF]PD\PDYMAVK?VK?PF;PE;PE;OE;OE;LB8LB8QGTI>UI>UJ>UJ>VJ>VK?TI=UI=XL?YL@ZM@[M@YL?ZL?`RCaSDcTDaRC_PA`QBaRB_PA`QA^O@_P@cSBgVElZHlZHm[Hn[Hn[Ir_KvbNzePzeP{eP~iSiSiSiSiSlUiSiSiSiSiSiS|fQ|fQ|fQ|fQxcNu`Lu`Lu`Lt`Lt`Llustu|ǥҮٳֱԮ߷ṑ㺒弓罔鿕ɝϡСѢ֦۪ܫܫبקѣɞ{bmu|ɧ~ǥͪسسֲܷƞĝʣ̧̥ЫϬέֺŭġնˮǬƩӴڼԾ¹˻Ƕıɴ׷swmcpf]h_V^WPPJDSLD`WMf[Pj^Rl`SnaSl_Rm`RqcTrcUufWyiYzjYzjZ{jZ{kZ|kZ|kZziXziXziXziX{jYo]o]~l[{jY{jYueTueTueTxgVxgVrbRo`Po`Pl]Nl]Nl]Nl]Nk\Nk\Mk\Mk\Mj\MgYKgYKdVIdVIcVHcVHcUH`SF_SF_RF\PDYMAVK?VJ?UJ?UJ?UJ?RG=WL@WL@VK@VK@VK@VK@UK@RH>RH>RH=RH=RG=OE;OE;NE;NE;LB9ND;ND;ND:MD:MD:MD:MD:PFQFWK@WL@XL@XL@XL@YMATI=UI=UI=YL@WJ>ZM@YL?ZL?cTFdUFfVGaRCcSDaRB_PA`QA^O@_P@`PAkYGlZHm[Hq^Kr^Ko\Is_LwbN{fQ{fQiSiSiSjS|gQmVjTjTjTjTjTjT}gQ}gQydO}gQydOuaLuaLuaLuaLu`Lu`Lt`Lt`L{ePnomttu|ǥΪԯֱز㻓ṑ㺒ฐ潔˜̟͠ΡϡТѣ֦ޭڪէϣˡq[w_jrvw{ ȥҮسổÛ㽗ȡƠǢ̧ġ˩ʪٽȲֱݾ׹¨ͲīպǬ۾ڿֿʷʸȷ±ͱrh_ri_h_W^WPPJDUNF`WMf[PmaUl`SnaTobTpbTqcUrdUxiYyiYzjZzjZ{kZ|kZ|kZ|kZn]n]o]~l[o]q_q_r_o]o]yhWyhWyhWyhWxhWscRp`Po`Pl]Nl]Nl]Nl]Nl]Nk]Nk\Nk\Mk\MhYKeWIdWIdVIdVIcVH`SF`SF`SF]PDZNBWK@VK?VK?VJ?XMAXLAZNCZNCZNBYNBYNBYMBXMBSH>SH>RH>PFRH>UJ?ZNCWLAUJ?UJ?UJ?XMAXMAVK@VK@VK@TI>WK@WL@ZNBXL@XL@YMAWK?WK?XL?YL@\OB]PB[NA\OA`RDeUFfWGbSC`QBbRC`QA^O@cRB`QAhWFlZHq^Kr^Kr_Lp]JwcNxcO{fQ|gQjTjTjTjTmVmVkTkTkTkTkT}hRzeOzeOvbMydOvaMvaMvaMuaMuaMuaLuaLxcNxcN{fP{fPzePzePkijmqw{|¡ͪ˨ѭܶ㻓๑⺒߸ṑʞ˟ȝΡӤϢܬզ٪ݭУҦϤ㾙mX{cnrot{ƥǥͪ׳濙ǟʢ̥ʣ⿛⿝̨ãܾչȰ۷ԳԸδɰʳиȯʹɲȲð̻ξ˸g_Wsi`jbZ\UNMHCUNF`WMf[PmaUl`SnaTobTpbUqcUufWyiYyjZzjZ{jZ~m\n]n]n]n]q_q_q_tatar_r_r_|kY|kY|jYyhWyhWyhWscSp`Qm^Nm^Nm]Nl]Nl]Nl]Nl]Nl]Nk\Nk\NeWIeWIeWIdWIdVIaTGaTG`SFZNBZNBWL@WK@\PD\PD^RE[OC[OCZOCZNCZNCZNCYNBYNBVK@VK@VK@SH>UJ@RH>RH>RH=RG=OE;OE;OE;NE;NE;ND;ND;ND;ND;ND:SH>SH>PFRH>UJ@UJ@UJ@UJ@UK@UK@[ODXMBVK@VK@VK@YNBZNBZNB]QD[OB[OCYMAZMAZNA[NB\OBZM@[NA\OA`RDeVFfWGbSDaRB_PAaQBbRBaQAhWFp]Kq^Kr_Ls`L{fQxcOxdOydOjTjTkTnWkUnWkUkUkUkUkU~hRzePzePwbNvbMvbMvbMvbMvaMvaMuaMydOxdOxcOxcOxcOt`LwcNwbNvbNvbNvbNjhijhir{¡Ȧʧհ۵ݷ߸Ẓڳ˜Ù̠էڪ֨רששӦ΢Ϥǟ{hTq[{cgkp{Ƥƥ̩۷ÜǟɢШΧ˦áǥãܾжȱ}ٷĨԺ˷л̹ȵòaZSsj`e^V^WQOJDXPHaWMf[PmaUm`TqcVpbTqcUtfWufW|l\}l\}m\~m\n]n]n]q_q_q_r`tbtbububububp^|kY|kY|kY|kYyhWvfUscSm^Om^Om^Om^Nm]Nl]Nl]Nl]NiZLfXJeWJeWIeWIbTGaTGaTG^QE[OC`SF]QD]PD_SF_RF_RF\PD[OC[OC[OCZOCZNCZNCYNCYNBVK@SI>SI>VK@XMBXMBUJ@UJ@RH=OE;OE;OE;OE;OE;NE;NE;NE;QG=SI?SI>SI>PFSH>UK@UK@UK@VK@VK@XMB[ODYMBYMBYNB\PD\PDZNBZNB]QD^QE\OC\PC]PC]QD\OB\OBcUG_QCcTFgXHiYIhXHdTEcSCbRB`PAlZHm[Ir_Ls`LwcO{gRydPyePzePkUkUnWlUoXoXlUlUlUiSiS{fPwcNwcNwbNwbNwbNvbNvbNyePydPydOydOuaMuaMtaMt`LwcOwcNwbNvbNvbNvbNubNuaNxdPxaghijhilx|~ΫЬ۵ẓ㼔ܵ⻓Ěɞʟˠ̠է٫֨֨ҥ٫Ϥ̣şvcQu_w`gostŤŤЭڶ޺œɢ˥ɤҬ˧âä׻жĨ̴ʬ]WPulch`X[UOMHCXPIaWNi^SnaUpcVtfXreWteWwhZxiZ|l\}m\~m]~n]n]q_q_q`r`r`ububububububs`p^p^n\n\n\|kZwfUvfUscSscSpaQp`Qp`Qp`Qo`Pl]Nl]NiZLhZLhZLeWJhZKgYKdWIaTGaTG`TG`SG`SF`SF_SF\PD\PD\PD[OD[OC[OCZOCZOCZNCWLATI?VK@YNBYMBXMBXMBUK@RH>RH>RH>RH=RH=OE;OE;OE;OE;QG=QG=QG=QG=QG=ND;LB9QG=QGVK@VK@VK@VK@YMBYMBYMBYNBYNBYNB\PD]PD]QD]QD^QEaTG_RE_RE`SEaSFaTFbTFfXIbTFgWHk[KjZJjYIfVFhWGjYHm[In\Js`MxdO|gRzePzeQ~iSlVoXlVoXoXlVlVlViS|fQ{fQxcOxcOt`Lt`LwcNwbNzePzePyePyePuaMr^Kq^Kq^KtaMwcOwcOwcOvbNvbNvbNubNuaNxdPxdPwcPwcOzfQw`xa|e~eijhilu|z{ģӯձܶݷ۵ẓ翗ȞŜʟǝ̡ԧ͢ѥԨϤժΥ۹{gTybjqywwuģĤɨٵݹЩʥȤֲͩ˩ݿۿپ~wp̰Įyymv~Ҹ`XQsjaf^WYSMOJDXQIf\Rl`UqdXreXtfYseWwhYxiZyiZ|l\}m]~m]n]q_q`q`r`r`r`ubububububvcs`q^q^n\n\n\}kZziXziXwfVvfUvfUvfUscSscSp`Qo`Qo`Ql]Nl]NiZLhZLhZLhZLeWIdWIdWIaTG`TG`SG`SG]QD]PD\PD\PD\PD[OD[OC[OCZOCWLAWLAWLAWLAYNCYNBYMBXMBSI>SH>SH>RH>PFQFRH=TJ?TJ?TJ?WLAWLAWLAYNCWLAWLAYNCYNCZNCWLAWLAZNC]QE]QE]QE]QE]QE^QE^RE_REbTGbUGcUHdVHgYJhYKiZKp`Oo^Nq`OsaPraOl[JiXGlZIo]JyePiTkTnWqZqZqZnWkU}hRydPydPydPuaMuaMxdO{fR{fQzfQzfQs`Ls_Lr_Lr_Lr_Lq^KtaNwdPwcOwcOvcOvbOvbOubOubNtaNwdPwdPzfRzfRvcOubOxeQxdQ{gSzgSzfS}iU~g{ev`t^u_zc~f|dhklsqt{ȧϬЭֲݷ㼖۶ݷٴ߹佖šΤҧҧѧЧҩҪݻt^imx}ʪЮճճճٶᾛàâɧƦ¤׻ջ˴Ϸsjbh`Yjc[tldzqwmqiame]Űi`Wskbe^WPKFWQKaXPlbWocXreXsfYreWvhZxiZyj[zj[}m]~n]q`q`r`r`r`saucvcvcvcvcvcvctatataq_q_o]o]o]~l[~lZziXziXziXziXwfVwfVvfVvfUscSp`Qo`Ql]Ol]Oi[LhZLhZLeWJeWJdWJdWIaTGaTG`TG`SG`SG_SF_SF_SF_RFYMBYMBXMBXMB[OCZOCZOCZNCWLATI?QG=QG=QG=QGUK@UK@UK@SH>UJ@XMBXLAWLAWLAZOC]QE_SG_SG]QE]QEZOCZOC`SG`SG`SG`SG`TG`TGaTG^RE_REdWIeWIhZKhZLn_Po`Pp`PqaQrbQq`PraPq`OtbPwdQq_MkYHo\JzePkUs[v]s[iSzeQzeQyePuaNxdPxdP{gR{fRt`Ms`Ms`Ms_Lr_Lr_Lq_Lq^LtaNwdPwcPvcPvcOvbOubOubOtbOtaNwdPwcPvcPvcPucPxeRxeQwdQzgSzfSvcQyfSyeR{hT{hT{gThzdxbyczdgyc~f{dhnopptx|âɧϬ޹۶ổڵ۶׳ش⼖ͣͣͤԪˣͥΧȤ|emqu£ȨɩĥӲӲܺ۹޼׷߾ݾշڽ¤ֻѸ̴~tzNMLԼyrjc[VQ^YSb\Vf`Zc]Wph`\VOskcjc[NJE]VObYPmbWrfZuh[tgYseXwiZxi[yj[zk[~n^q`q`r`rarasasavcvcvcyfyfyfwdwdwdwdtbtbtar_o]~m[~l[{jY{jY{jXwgVwgVwgVwfVtdTscTm^Om^Om^Oj[Mi[Mi[Mi[LeXJeXJeWJeWJaTHaTGaTGaTG`TG`SG`SG_SG\PDYNBYNBYMBYMBXMBXMBXMAXMAUJ?UJ?RG=RG=WLAWLAVLAVKAVK@VK@VK@VK@VK@XMBXMBXMB[OD]QF]QE]QE`TG`TG]QE]QE]QE`TG`TG`TG`TGaTGaTGaTGaTGaTGgYKgYKgYKgYKp`QpaQpaQqaQtdSrbRscRqaQsbQtcRveSygTwdQ{gSxdP{ePmV|au\|gRzfQyeQ|gStaNt`Ms`Ms`Mr`Mr_Mr_Lq_Lq^LwdPwdPvcPvcPvcPubOubOtbOq_MtaOsaNsaNr`Nr`NubPtbPtbOp_MsaOsaOvcQucPxeRtbPtbPtbPsaOsaOucQucQjyc}f~gyczd{d|ezc{djklostx{ãͫش޹۶ܸձֲ۷ܷ濙ǠˣϦǠʣȢʤͨѴnnrz~¤ǨǨѱյԴӳۺٹԶ̰ɮϴǭ̲Ȱv{qzrheawqi`a\WVSOSOLWSOYTOqg\^XRnf_ib[[UP`YQj`WpeZsg[uh[wi\vhZwi[yj[zk[{k\q`q`rarasasavcyfyfyfyfyfzfzfwdwdwdwdwdububr`p^m[m[|jY{jYxgWxgWxgWxgVtdTtdTqaRn_Pn_Om^Oj\Mj[Mi[MfXKfXKfXJeXJbUHbUHbUHaUHaTGaTGaTG]QE]QEZNCZNCWL@WL@YNBYNBYMBYMBXMBUK@XMBZOCXLAWLAWLAWLAWLAWLAYNCYNCWLAWK@YNB^RF^RF^RF^RF^RF^RF^RF^RFaTHaTHcVIcVIaTGaTGaTGaTGfXKfXKfXKcVIfXKiZLk]Nn_Pn_Pn_Pn_PqaRvfVwfVwgVxgV{jXzhWxgUzhVygUygT~jVlW|gRkT~bt[jTubNs`Nr`Mr_Mq_Mq_LwdQwdQvcPvcPucPr`Nn]Kn\Kq_Mp^MsaOsaOr`Nr`NubPq_Nq_Mp_Mp^MsaOucQr`Oq`NtbPtbPsbPsaPraOraOr`Oq`Oq`Op_NsbPsaP|gwb{fydzexcycwaxbv`zdg|eirstruyãڷܸݹߺ״صٵ޺ߺ߻ƠʣʣͦȢʤʦţryz~{{ƧϯӳҲ˭Ӵ߿عոβ¨Ǯ~}v{rrkejeaPNKCB@HFDHEBg_Wha[qiac]VVQLbZRkaWqfZqeYvi\xj\yk]xj[yj[zk\~n^q`raraucvcvdvdyfyfzfzfzfzgzgxdxexexexexdububs`p^m\|kYyhWyhWyhWueUueUueUrbRrbRqbRn_Pk\Nk\Nj\Mj\MgYKgYKfYKcVIcVHcUHbUHbUHbUHaUH^RF^REXMAXMAXLAXLAWLAZNCZNB_SF\PD_RF\PD[PD[PDXMBXMBXMBXMA[OCZOCZOCXLAZOC]QE_SG_SG_SG_SG_SF_SF_SFbUHbUHaUHdWJdWJdWJdWIfYKi[Mh[MhZMhZLeXJgZLj\Nl^Oo`QqbStdUvfV{kZo^o^o^o^p^p^n\o]p]q^p]mZ}jWp\q[oX{_z_xdPvcPwdQsaOr`Nn]Ln]Ln\Kq_Mp_Mp^Mo^MraOo]Lq`Nq`Nq_Np_Np_No^Mo^Mr`Oq`Oq`OtbQsbPsaPraPraPraPq`Oq`Op`Op_OsbQraPraPqaPq`PscRsbQxcvbt`r^vawbxb|fs_q]ycva~g|eilrsqry|˪ѯ۸ݹղֳ۸ܸصὙşЩЩ̥˥ɥ˧ȥǦ{~ͮǩϰͯԶֹϳѵʯǭëzz|sunfoibje_ulbpiaxpha[UYSNc[Sqg\rg[rfZwi\{m_zl]{l^zk\{l\o_raucudvdvdvdyfzgzgzgzg{g{g{gxexeyeyeyeyeyevcsaq^q^}kZziXyiXvfUvfUscSscSscSrbSrbSl]Nl]Nk]Nk\NhZLhZKgYKgYKdVIdVIcVIcVIcVHcUH]PD\PD\PDYMBYMA[OC[OC^RE`TG`TGcVI`SG]QE]QE\PD\PD\PD\PD\PD\PD\PD^RF^RF^RF^RF^RE^RE`TG`TG`TG`TG`SG`SGcVIeXJeXJgZLgYLgYLi[Mh[MhZMhZLj\Nl^Pn`Qn_QpaRueV|l[n]p_p_r`r`r`r`r`s`ubucvcwcwdxdwcvbr]r]u_nXdx^|gSr`Op_Np_Nl[Ko^Mn^Mn]Mq`Op_Nm\Ll\Ll\Ll[Kn^Mq`Oq`Op`OsbQsbQraPraPqaPq`Pq`Pp`Op_Oo_Oo_OraQqaPq`Pp`PsbRueTrbQo_On_On^On^Nn[o\r_mZ}kY~lYmZq]r^vawbxbvawa{ehimpnorv}ϮǨɩέ˪Яճڷ߻߼ǢϨҫѫԮӭѬʨǦֺ̭бŨɬ̯ɭ̰ҶϴѶɰy|~rt~sxnunfwogSNIb[TlcYsh]qeYsfZxj]|n_{l^|m^zk\~o_raududvdvdwdwezgzg{g{g{g{g{h{hyeyeyeyeyeyeyeyeyetao]o]}lZwgVwfVtdTtdSscSscSscSscSp`Ql]Ol]Nl]NiZLhZLhZLeWJeWIdWIdWIdVIaTG^QE]QE`SF]QD]PD]PD_SFbUHgZLdWJdWJdWJdWIaTG`TGcVIcVIcVIcVIcVIbVIbUIeXKeXJbUHbUHeWJeWJ_SFbUHbUHaUHaTGdVIi[Mh[MhZLgZLgYLi[MhZMj\Nl^Pn`Qn_Qm_QteVvgX~m]}m]q`sbsbsbsasasasatbtbtbwdwdxdxe{g|g}h}gxcvaw`w_lnWn]Lj[Km]Ml\Ll\Lk\Lk[Lk[Lq`Pp`Pp`Oo_OrbQraQqaQqaPq`Pp`Pp`Po_Oo_Oo_On^On^Oj[LgXJgXJl]Nl]Mk\Mk\Mk\Mj[Mj[Lo`Po`Po_Pn_PqaRs`s`ta~lZ{jX|jX}kYlZmZn[r^vawb{fyd}g~hiljnnrsyɪʫЯմִұܹܺᾛžɤͨԮϫ˧̩ͪάĥܾҷëɫǪ§§ȭɯʱ|~zϷӺּʹȮ©zpf}umng`SNJhaYrh^vk_rfZvj]{m_zl^|m^}n_scrarbudvdvdwewezg{g{h{h{h|h|h|h|hzfzfzfzfzfzfzfzfzfwdta~m[~l[{jYudTudTtdTtdTtdTtdTqaQpaQpaQp`Qj[Mj[Mi[Li[LfXJfXJbUHeWJeWJbUGbTGaTGaTGgYKi[Mi[Mi[McVIcVIcUHbUHeXJhZLeWJeWJgZLgZLgYLgYLgYLgYLj\Nj\Nj\MgYKgYKdVIdVIfYKfXKi[MhZLhZLj\Ni[Mh[MhZLi\Nk]Om_Ql^Pn`QpaSrcTwgXvgW{k[o^q`q`q`sasasasasasasasavcvcyeyezf|h{f|g}g~hi}gzdzb}bhn]Lk\Lp`Pp`Po_Po_On_OqaQqaQpaQp`Qp`Pl]NiZKeWIeWIeWIdVIgYJgXJi[LiZLk]Nk\Nk\Mj\Mj[Mj[Ml]On_Pn_Pn_Pm_PpaRueVueUteUtdUsdUr_xds`tan\o]p]q^o\o]m[n[o\vbwc{fyd}g~hljjnorv|åɪήϯطݻ⿝޼ġġġ˧ƤƤɧȧʩ£׺ո׻պеδʹɰĬªĬƭ˲Ѷҷϵٽγǭz~~ke_wohd^W\WQmd\tj_uj^ui]zm_pb{m^|m_qatdrbsbvewewewexe{h{h|h|h|h|h}h}i}izfzg{g{g{g{g{g{gxdxdububr`m[m[|jYueTueTueTueTrbRqbRqaRqaRn_Om^Om^Oj[MgYKgYKfXJfXJfXJl]Nn_Pn_Pn_Pk\Nk\NeWJdWIdWIgYKgYKj[Mj[Mi[MfYKi[Mi[Ml^Oo`Qo`Qo`Qo`Ql]Ol]Oi[Mi[Mi[Li[Li[LiZLhZLm^Ol^Ok]Nj\Ni[MhZLo`Rn_QrcTqbTpbSufVwgXvgX{k[o^o^q`q`p_p_sasasaucucwexexezgkkmomnnkjkj~g|deosbPn_Pj[MfXJfXJfXJeXJeWJeWIdWIdWIdVIcVIfXKi[LhZLhZLj\Nj\Nj\Ni[Mi[Mi[MqbSpaSpaRm^Pl^Po`Rn`Rn`QpbSpaSrdUrcUrcTteVseVsdVn\q_r_s`s`tao\~lZm[q^o\o]p]q^~lYp]q]xcva}h{fijjnruv|ˬ̭յյںۺַ׷ַۺں޾ݾܽ׹ָոոԷӷ׺׺ʯγʰʰƭéĪīɯʯϴ˱z~~xpunfYTOg`Zvmc}rf|oc|oauftepascvfudsbtcwezgzh{h~j~j~kkk}i}i}i}i~i~i{g{g{g{g{g{gyeyevcvcvcs`s`s`p^n\m\m[yhWyhWyhWxhWxgWueTrbRtdTqbRtdTwfVwfVsdTscTscSscSscSo`Qo`Ql]Ol]Ol]Ol]Ol]Nl]No`Qo`Po_Po_Po_Po`Po`Po`Po`PrbSo`Qp`QscSscSm^Nm]Ni[LiZKk\Mj[Mk]Nm^On_Po`QpaSrcTsdUrcTqbTsdUwhXyiZ~m]o_o^o^o^q`p_p_ucweweweygzg|hlppprsssrroponjkjpm^ObUHaTHaTG`TG`SG`SG_SGeXKeWJdWJdWJdWJfYLi[Nh[Mh[Mj]Oj\Ol_Ql^Pn`Rn`Rn`Rm`Rm_Qm_Ql_Ql^QqcUqbTpbTpbTobTrdUqcUteWseWseWvcvctatbr`p^p^taubr`p^n\ubvbs`taubyewcxdr_ze~hijjntx|wħͯΰΰƩ˭ӵܼܼ׹׹ۼۼҵҵͱȭȭȭ̱ȭǭǭīīɯ~~~swuoitmfrleWRMqia{pfsg}pd}obvgyjxhyh|kzivewewfzh{h{i~kkkkllllllljjj|h|h|hwcwcwcwctatatatatavcvcvcsas`s`n\n\|kZ|kYm\|kY|jY{jYxhWxgWxgWxgWxgWtdTqbRn_PqbRqbRqbRqbRqbRqbRqbRtdTudTudTueTueTveUveUscSqaQqaQo_Oo_Oo_OraQn^Np`Pl\Mm^Nk\Ml]NueUscTyiYueVsdUrcTqbTxhY|l\~n^p_o_o^o^o^o^saucucygzg|illnppppqsssttuurqokjn|baTH_SG^RGdWKcWKcWJfYLeYLeXLg[Nj]Pj\Oi\Oi\Ok^Qn`Sm`Rm`Rm_Rl_Rl_Rl^Qk^Qk^Qj^Qm`Rl_Rl_Rl_RnaTpcUpbUrdWrdWtfXvhZvhZugZugY{gxeyevctar_o]p^p^taxeucvczf{gxeyfzf{g|h}hll|hkoopyzyzǫ˯ԶԶԶггԷϳ˯Ǭ˯ʯƬ¨~{x{yvspmwuqkdxqungYTO^YSvmd{pewjtfug{lxhyi|k}l~l|jxfxf{i|i|illlllllmmmmmkkkk{fxdxdxdububxdxdxdxdxdxdwduar_r_r_tataq_q_o\o\}lZn\n\}lZziXziXwfVtdStdStdStdStdSwfVwgVxgVxgV|jX|jYzhWziWudSvdSweTtcRyfT}jWzgUweSsbPraPq`OucRudSudStdSziXwgWzjYxhXtdUrcTyiZ}m]o^n^p_p_o_o^o^q`sbsbxezgmmnnpppqqqusuttuwxspnpmիfYK_TH_SH^SHdXLfZNfZNh\Ph\Oj^Qj]Ql`Sl_Sl_Rk_Rk^Rk^Rj^Qj^Qj]Qi]Qi]Qk_Rk^Rk^Rm`Tl`Sl`Sl_Sk_SnaTreXocVm`Tl`Tl`Sk_Sk_Sk_Sj^S}i}ixexevcvdtbtbxe{h|i}i}j~jm}i}jmkzfzg~jkllmn}i{gpvyxsz}}éȭȭĩȭĪ|yvyvvpp~n{k{m_{m_yimztnzsmmgab]Wjc\{qgthviwi~n|m{kzi}l~lm}k~kyg|i}j}jllmmmmmmmmnnnl~i|g|g|g~iyeyeyeyeyeyeye|gyeyevbyeyeyeyeydvbvbs`p^p^p^n[n[n[}kY}kY}kYziW}kZ~lZ~lZlZ|jX}kX~kYlYmZ~kX|iW~kWlXp\u_|dghjhgu`n[mZ~lZvfUxhW{jZ|k[zjY{k[q`p_q`sbsarararar`r`tbvdvdkkmoooqssuuwuvvvywxutrqotqaP`UJ_TJ_TIaVKdXMcXMcXMcXLeZNeYNeYNdYNdYMdXMcXMcXMcXMbWLg\Pg[Pf[Pf[OcXMcXMcXMeZOdYNg[Pf[Pf[Ph]Qh]Qg\Qg\Qg\Qf[Pf[Pf[Pf[PeZP}ik}i}j{hxfvcvdwdwexexfyfyg}j{h{h|i}jzh~kllm~k|ilmpqoosywtuxyzzzwzztqwtqzkzkzkvhvhvhxl_mbWrfZsgZ}xs~xquoi[VQe_Yypgujwkwj{lo}nnn~mm}k~k~l|jzh}j~jmmmmnnnnnnnnooolmmjj}h}h}h}h}hzfzf}h}hzfzfzfzfzfzfzfzfxdr_p]p]p]m[p]p]q^n[o\lZmZn[n[o\q]r]nZv`~fhlmlpppsloljs^p\p]tasaq_o^p_tbq_ratbtbsbsasaucucyfyflmooooouuuutxxvw{yzzwutosȢ|tcR`VK_UK_UKbWMaWLaWLaVLcXNcXNbXMbXM_UK_UKaWLaWLcYNcXNbXNbXMbXMbWMaWMcYOf[Pe[PeZPeZPdZOdZOdYOf[Qe[Pe[PeZPdZPdZPdZOf[Qf[Qe[Qe[Qmm|i}izgzg{hxfvdvdsbtbtcucrasbvezh{h{iygvetc{i|iygzhxfxfygm~knoovsqtorv|~xxpp~mppspps|l{lwiwisfsfwl`sh]kaWndZg]TcZQg^Th^Tzvrztotoic^Ymgaqib|riuiyl}oppnoqrn~lllm{i~kknnnnnoooqqrorrrppnkkii|g|giiiii|g|g|g}g}g}g}g}hxc{fxcuas_n[q]q^r^s^t_wbyczdyc~gkoov{Ǧղڶүάά԰ձѭxsnjq^tawdubsavdwexfzgwewdyfxfzgzgzgzgnnrppprttvtttvwy{}~|}zwurtҩk]NVMDUMDXOF]SJ\SJ\SI^UKaWM`WM`VM`VL`VLbXNaXNaWNaWM`WM`WMbXObXNdZPdZPcZPcYPcYObYObYObXObXNdZPcZPe[Re[Qe[Qd[QdZQdZQf\Re\Re\Re[Re[Qtrmmknzgweweucududsbp`q`qararbsbsbtczixgxgsb}n^qarbrbscwftd~o_vevewf{i{jxgve|knwfor~ypnqxhxhue{k{ktewhwhzkvhxktgtg|pdlbXd[Rc[R_WObZRaYR]VOSMFe`[uroxsnxsmd_Ze_Zvngypgtjvjzmrwsqtrsqommnl}jlnoooorrrrrssssqqooomjjmmkkkkkkkkklljj~hxc|fydzd{e{e|f~gh~gklnsxֳڷЯܻܺ࿟ݽۼعϱܻ׵׳޷tpixdxdwdtbsa{h|i}j}i|i|i|h{h{h}j}ioqsssssuusuuyyy|||}~{wyvr޲VMDQJBQIBSKCVMEUMEUMEUMEWOFYQH[SJ[RJ[RJ[RI]TK_VM_VM^UL^UL`WN`WN_VN_VMaXOaXO`XO`WN`WN`WN_VN_VM_VM^VM`XObYPbYPd[Rd[RcZQcZQcZQbZQbYQbYPxvtrmnk|i|j|jwewfudududsbscsc}n^~n_~o_o`p`pavfwgrb~o`|m^payk\vhZsfXtfXxj[yj\vhZzk]zl]{l^sdsdtdtete{kxixiufrdrdyl^yl^|oardqdtgwiviviwk`uk`mcZh`Wg_Vf^VaZS`YR\VOXRLQLGdeexurtpkpkfkfaje_tmgvnfwmvk}p~psxvruuvrrpnnl~j~kkprrssusssttttrrmnnnnnnnnoooooopnk}g~gjhh}g~ghijkprvyӱ໗ӲֻȰ©ָ׵ԯŢspxc}hyfwd}i~jk~j~j}j}i}imlnrtussuuuuvxz||~}}}{y|{{ѩm]LSKDRKCRJCRJCQJCSLESLDSLDUNFWOHWOGYQIYQIYQIXPIZRJZRJ\TL^VN^VM]UM]UM]UM]TL\TL\TL\TL[SL[SK[SK]UM\TM\TL\TL[TL]VN]UM]UM]UM\UM\TM\TL\TL[TL[SLvxttrrpn{i|il|jyhyhwfqaqb~o_~o`{l^|m^|m^yk\yk]vh[wi[wi[wj\pauhZvh[sfXsfYtgYmaUugZuh[vh[vi\wi\wj\wj]qcwiufxi|oa}obseyl_rfZrfZrfZui]ti]wk`vk`rg]qg\mcYh_Vc[Se]Ud\U_XRZTNVPKQMHPLHKHEcdewuruqmrmhhd_mhbzsl|tl|siwlxlsvvywvvvwusssomkknnqqtttttuuussqooqroppppppqqqrrknnoopqopoput~ģ޺׵Դٻ̳|vvt~syoɬ޹رyrl}hzf}h}ijlkkj~jlmosuvvvxxxxz{}{}{|zxNG@LE?NGANGAMG@PIBOIBRKDVOHVOGUNGUNGWPIYRJYRJYQJXQJXQIXPIZRKYRKYRKYRJVOHVOHVOHXPIYRKYRKYRKYQJXQJXQJXQJWQJWPIUNGTNGVPIXQJXQJXQJWQJWPJWPJWPIVPIuxsstqrpmn~k~lxguexhxhyhvfsd|n_yk]yk]vi[sfYsfYtgZtgZqdXqeXqeYocVl`Tl`Uf[Pf[PqeYqeYuh\|ob|ob}pc}pcznath\xk_uhuhyksfsfymauj^qg\ncYj`Vpf\oe[h_Vc[Sf]UaYR_XQXRLSNHQMHMIEHEAFCA<:8_abvtruqmokfkfasmgyrk}umvmwm{ozn~puzz{{ywuvtttrpnooorrrssssttttturssqqqrrrrsssooppqrstutxzǦ༘׶ںԹư}zpynzpxn{pyo|sjvn|tlzrvn|s}{Ϯ⹑Ťtmk~iklmllkkkmnpxxwwwy{{|zʥڱNG@F@;F@;E@:E@:JD>LF@KF@NGARLERKEQKESMFSMFSLFRLFRLFRLERKETMGUOHUOHUNHWPJVPIVPIVOIVOIUOIUOHWPJVPJVPJVPIVOIUOIUOIUOIUNHTNHTNHTNHVPIUOIUOIUOIUOITNITNHswsssqoommm}kziwgwgwgwgrcrcrcrcrd}oazm_wj]tg[th[qeYqeYncWk`Uk`UlaVi^Ti^TcYOcYPg]SncXncYndYymash\sh]sh]ti^ti^qf\uj_uj_{pe{peqg]jaWg^Uf]U_WOaYR]VOYRLWQKSMHNJELHDKGDC@>><:765XZ[omkuqnrnilgbuohxqjvnzp{pwl}q~qtvyyz|}{yyuuvvqrrrsssttttuuuuxxvttrsssttuuusrrstuvtvz|ƥŤʨߺԳ׸Ͳu~vnohaxqjvohslevngqjcungvoi|vozsyryqyqzqwײϪtrojomnnmmllnqsyy{{{{{}~~¢}乍|hTID>C>9B=9GBHC=JD?LFALFAPJDOJDOIDOIDQKEPKEPJEPJERLFTNHSMHUOIUOIUOITNITNHTNHUPJUOJUOITOITOITNITNISNHSNHSMHRMHRMHRMGRLGQLGQLGQLGSNHSMHPKFPKFPKFuwwssqnoolm|jyhyhyhyiyipbpbpb~pb~pbxk^uh\th\th\th\qfZncXncXk`Vh^TkaWkaWkaWi_UcZPcZQ`WOg]Td[RkaXkaXh_Vi_Vrh^lcZsi_tj`tj`qg^jaXg_Vd\Sc[S\UNRLFUOHNHCMHBKGBJFBC?<>;9B?=:97876LMOhfeurnsnjmhcvpi|un~vm}tkyov|p}prsuz}}}|wxvvvtuuuvtttuuuuvvyyzzzxyttuuvvyuvttuvwyz|ŤɧɧâɧٶӲɬѵ{s}v}vpvqkvpk{up{uoxrlztnysxr{tzt|unxqxp~u|svl}px羕Үuuoqopqqnnooqsww{}~~zŤĢ۳NG?C>9B=9D?;FAJE@ID@MHCQLGQLGQLFPKFPKFRMGRLGQLGQLGQLFPKFRMHRMHQLGQLGQLGNJENIENIDNIDMIDMHDMHDLHDLHCLHCLGCLGCKGCKGBKGBKFBJFBLHDLHCLHCwwuuwwppqn{j{jxhxhxhxhufrcrdrd}oa|oavi]ui]rf[rf[rf[qf[ndYndYndYqf\qg\ndZkaXh_VkbYh_WbZRf]Uc[S`XQ]VN^VO[TM_WPbZSi`XiaYld\f_Wc\Tg_W`YRWPJSMHMGBID?HD?HC?C@;8<:8764211?@@cbaqmjlhdlgauoh~vowozqvl~sx{o|otvyw|||}{{yzzxxvtuuuvvvwwzz{{|||zxyzz}ywxvz{|z|ɧ̪ͪϬ̪ϭֹͭͯʱ}z~wy~}}}|~w{t}vo{t{s{s{sxp|sk}tk}siwkz|ęֱ{uwwxutttsuwwy{{{~ģʧḎΝNE=A<8?;7A=9C?;C?:C>:B>:B>:B>9D@;C?;C?;C?;C>:B>:B>:D@;7@<9@<8@<8?<8A=:A=9A=9@=9@<9@<8D@;B>;C@GD@GC@FC@FC?FB?FB?EB?GD@GC@FC@FC@FC?FB?EB?EB?EB?EB?FC@FC@FC@FC@EB?EB?EB?EB?EB?EB?DA?DA>xtvvvtrmpmm|k|k{kvgvgsd}pbzm`ym`yl`ui]rf[qf[qf[sh^rh^oe[ne[ne\me\md\md\ld\ld\ld\ld]ld]ld]le^ib\ic\\VP\VQWQLTOJRMHLHCMHDJFANIDQLGTOJQLHNJEEB>?<8<96630530742*)(+*)654JIHYWUkgchc_jd_smf}um{q{qtt}qtqyyyyzwzz{{{ywwuvy||}}~£¢Ťģ̩Ьԯӯֲ׳̪ήŨͱ̲¬~~{x}umxowoyqyq{sk{sk{sk}um}umwogwogqibqiaqiaqiaog^me\_WNl`Up_}ȝ˟ڴͫͪĤ¢Ťʨɧְ̩ͪ鿔ϟɜaSF@:5841962;74;74:73841:63963962630852852:73<85@<9?<9?<8A>:A=:@=:@=9B>;A>;A>;A=:@=:@=:B?;A>;C@=C@=DA>DA>DA>C@>C@=C@=B@=DA>DA>CA>A?<@>;@>;@>;@=;A?<@><@><@>;@>;@=;@=;?=;?=;?=;?=:?=:|xuvxvsqqqo~m~m}mzkxhufrd|oa{na{nawk_wk_ymauj_tj_qg]pf]mdZlcZne]ne]me]le]ld]kd]kd]kd]jd^hb[e_Ye_Ze_Zb]Xc^X^YTYTPTOKQMILHEJFBJGCEB>FB>C@<@=:;8520--*('%#!#! %#"0/.;:9GFDZWTfb]hc]jd]qibzrizpuws}p~psux}}vvy|z}}~{ywx}~ääŦĤ¢ģǦʨΫҮײֱٴڵɨӳɬ˲ɱŮ}yyxuyo{qzqxu~vm{sj}ulzrjwogvnfskdphame^jb[ib[ib[iaZc\Ub\U_YRZSMWQJTNHXQJZRKQIBSJAaTHq^tģְÙฐΫ{weT^RG<60:5151-1.+30-2/,2/,2/,41.63/52/52/52/41/63063063052052/741963853:74<96;96=:8=:7=:7<97<97>;8=;8=:8<:7<:7>;9?=:?=:?<:><:><:@=;?=;?=;?=:?<:><:><:@=;?=;?=;?=;>=;><:><:><:><:=<:=;:=;9=;9=;9<;9<;9><:><:><:=<:=<:=;:yywwuuqssssowhtercqcqc~qc{nasere~re~qewl`si^sh^uk`tj`sj`pg^of^nf^haYjc[ib\ib\hb\hb\e_Zd_Zd_Zd_Za]Xa]X\XTZVRZVRXTQNKGLIELIFEB?@=:>;8;9696441/,*(&$##" #! (&%20.975GECLJGUQN\WRe_Zle^skczqhyotttrrttw|wxx}}{ywxx{||zxvyzåŦĥƦɨǧƦŤȧ̪Эհٴٴִܷܶʭƫ̵̶~~{x|r{quk~tkvm|sjxown}tk|skyphumeog`og_haZhaZhaYg`Yg`Yg`Yf_Xc\Uc\U]VP\VPYSMYSLXRLSMGJE@GB=D?:D?:>:5;7384051.2.+41-40-62/52.-*'2/+1.+1.+.+).+(+(&/-*1/,31.30-74174174163141.630752742741641853:75974;96=:8=:7<:7>;9=;9=;8?<:><:@>;@=;?=;?=:@><@><@><><:=<:=;:?=;>=;><;><;=<:=<:?=<>=<>=;>=;><;=<;=<;=<:=<:=;:=;:<;:<;:>=;><;><;=<;=<;trtwrrppurrnvgvgseseugrere{ob{nbwk`ynbxmbxmbqg]pg]ri_qh_ph_pg_og_nf_jc]ohanhamgajd_gb]d_Zc_[a]X`]Y^ZW[XU[XU\YUWTQPMJNKIIGDJGEEC@A><<:7:85753-+*#! -,*0.,531753?<:IFCLIERNJXSOd^Xha[og_ypgvkwl}ps{nsvvtvwww|z{{yww}~yzuvyzåŧȨʪɩȨ˪ƥʨүҮײײ۶޹״ԴӹԾʷǵƴzyxxzyvzpyovl~ukzqhzqhypg{rizriqiamf^g`Yf_Xc\Vf_Xb\Ue^Wb[Ta[T^XQaZT^WQ]WQ]WP_YR_XR\UOSMHGC>GB=GB=C?:C?:=95:637407301.+1.+0-*0-*-*(*(%*(%/,).,).+).+(-+(-*(-*(/,*.,),)'.+)0-+/-+1/,1.,0.,20.42/41/31/31/30.420642642753753753642642864975975:97<:8<:8;:8=;9>=;><;=<:=<:=;:<;:<;:<;9;:9;:9;:9;:8:98:98:98:98988987;:9::9:99:98:98:98998998988988987987987987887voommoqrrtrrzkxiufwiwhvhsfuhuhuhthth|qf}shwmcvlculctkcskcumetlepiboibnhbnhbje`gb^d`[c_[c_\`]Y^ZW][X[XVVTQROMKIGKIGIGEGECECAA?=?=;@><><:97520.531853853@=:B?=EB?KGDNJFWRM^XRe^WiaYxoexmvjvjwkruur~nxv{{yz|}{yy}}~|}x|}ĥʪ̫ͭǧƦʩӰӯسٴ޸۶ۺͽ}yvuzovluk~tj}sjul~uk}tk|sj{rjwogwnfg`Yd]V`ZS]WQWQKWQKYSMVPKVPJXRMXRLXRLZTN`YSb[UTNHSNHKFAGC>JE@D?;A=8>:6=9584074041.1.+1.+.+(.+(-+(*(%(%#%#!'%#,)'+)&+)&-+(-*(,*(,*')'%+)')'%+)'/-*.,*.,*.,*-+)/-+/-+31/20.20.42031031/431421420542532532431421321542532654875987;:8<;:<;:;:9;:9;:9::9:99:98998998988988887887877877777776776888888887887877877777777777777776776us~l{j{jnqsqqso~oyj{lxjxjzlzlylylvj~sguiti~ti{qgzpgtkbsjbtldskdphaohangamgalfahd_hc_ea\b^Z_[X\YVWTRTRPTRPTRPRPNMLJIGFGEDEDBCB@DBAECAA?=?=;:97><9<97753:85C@=C@=A=:D@=MHDVPK\VP^WPe]Uqh_ti}rfuivi|n}oqootzxxxyyzzy||~}Я˫˪ʩӰ԰ձڵຕɣٕ~xrxsoyvr|xu~xttymuj{qgzpfulctkbsjbriawnfvmeumeng_jc\^XQ`ZT]WQWQKSNIPKFPKFRMHRMHLGBLGBQLGQLGQLFKFAHC?E@9B=9?;695162/30,-*'-*'*'%*'$'$")'$)&$)&$(&##!#!" %# $" &$"+)&+(&*(&(&$*(%'%#'%#&%#&$"(&$,*(.,*.,*-+*-+)/-+.,+.,+0.,/.,/-,0/-0/-20/10/32021021021/10/321211210432322321443654554987887877877777776766666666665555555555555455444666666556555555677667667666666666666666666om|k|k~myi|k~n~np}n}nxixiwiwiykykxkxkxkwkwkvkvkukypfxofwnftlcpiamf_le^ke^lf`ic^hc^gb^fb^fb^`]Y]ZW]ZWXUSWUSSQOSQOLJICA@@?>CBADBAB@?BA@CB@?>:NIDTOIUOI_XQZSLg_Vwmbsh}qexjxkvhzkxiqtywwxx||}{yz}~Ħ¤ƦέϭԱղܸ㾙ٞ~wwrlqmhkgckhekhewtrwtrvs~rzoyn}rhukbsjariaqh`le]haYd]Vc\Vb\Ud^Wd]WTOJTNIVQKPKFOKFRMHLGCID@HD@HD@B?;B>:?<8?;8B>:?;7?;7>;7>:7;84;74;742/,-*'-*'*'%'%"'$"&$"&$"#!&#!%#!(%#'%#*'%)'%&$"&$"(&$(&$*(&*(&)'%)'%+)'+)'(&$(&$,*(+*()'&+)'*)'.,+.,+-,*-+*/-,.-,0.-/.-/.-/-,.-,.-,10/10/00/0//0/.//./..221211111544444444555555555666778777777677667666566566556556556455455455445445345345556556556556456455455qnzj}lzjxhufwhwhzkwhtfvhvhvh{mxkxkwjwjvjxlxlujtjyoeulcrianf^ph`oh`ib\kd^lf`ke`je_id_d_[ea]b_[[WTZWTWURURPRPNMKJFECFECDCBBA@BA@CA@CBAA@?DCBCA@DB@ECAA?==;8>;9?<9=:7;85?;8B>:KGBRMGSMGTNGRLE_WOg]Tti_uj_wk_xl_}obyjrpusqruuy|}xy~ʩ˪ѯԱӬվy}xsytoytoplhkgcda^khe|zw}zx~~~r{owkzpevlbqh^sj`of]jbYiaYjc[f_Xe^Wd]Vc\V^YS^XR]WRYTOMHDLHDIEA@<9@<9<96<96?;8>;8<85;8595285263052085285220-2/-/-*/-*,*'*'%'%"$" $" ! #!" %#!$" $" $" $" &$"%#"%#!%#!'%#+)'+)'-+),*(,*(0.,/.,/-+.-+.-+0.-10.1/.0/.0/.0.-10/10/21021010010/00/21111011022222100/333322222444555667666566556556455455567566678678677567567567567567456456456456456456356345345567567567566yk^|n`yl^qcsesevg~qc{naxk_{naznaserere~rewjvjvjsg~sg}rgzoeyoesj`ri`ri`qh`me^og`qibpiblf_ic]f`[e`[d_Zc_Zc^Z_\XZWTWTRUROPMKKIGHFEDBAFDCHGEDBAB@?@?=BA@CB@DBAECACA@B@>><:<:8=;8><9:74>:7?;8@<8ID@MHBQKEOICMGA]UM_VMi_UdZPlaVmbVreYufyiorppqtu{|{yw}|{}}˫ӱҹzz~yt~yt}x{vqtokea]fc_`][gdbywtvvjuj|qg{qfpf]kbYf^Vh`X]VO_XQ^WP]VP[UOZTNVQKXRMWRMXTOUPLKGDHDAB?;641963;85;85:85520/-+2/-/-+/-*,*(/,*,*(.,*.,*)'%&%#&$"&$"$"  #!"!%#!$#!$" &$"$" $" &$"(&$'&$'%#'%#'%#&%#&$#(&$*(&+*(/.,/-,/-+.-+0.-/.-/.,.-,.-,.,+/.-32121021010010/00/0/////100222333333233444556555667788778678677567788678678678568567345345678578578568568567467467467457457457245235235235356356ֱwdRl_RrfZsg[vj^yl`{obsg\sg\vj^{ob}qe}qe|pe|pe~rg~rg}rg}rgti~si{pfwndvmdsjbme]le]ng_mf_jc]ic]ib\e`Ze_Zd_Zc^Z`\X[WSUROUROPMKMKIKIGDB@FDBFDCCBADBAB@?BA?BA@CA@CB@B@?CA?A?=@>;A>;?<9=:7<85@<8FB=JE@LF@GA;9>;885320.1/..-+.,+.,*+)(+)(+)(*)'*)'#! #! #!      "!"!"!" " $#!$"!$" $" &$"(&$(&$*(&*(&*(&)'%)'%)'%('%(&%(&$'&$+*(+)(*)(,+),*)-,+/.,/-,.-,.-,-,+/.-0/.0/.10010/00/0/////000000111111011000/00112233345456667567567457456456356346467457457356356346246467457357357357468468468468468467467467457457ܹ۸۸մڸpbTkaWsh]sh]qf[qf[sh]xmaxmbxmbxmbzodyod|qf{pf}shwmdwmcvmculcrjatlcng_nf_mf_le^d^Xc^X`[V]XSa\Xa\W^ZUVROVROQNKNKHNKHIFDDB@DB@FDBFDCDBAB@?BA?BA?CA@A?>B@>ECACA?@=;C@>?<:@=:A>;@<9A=:B>:D?;JE@LF@GAFC@=:87531/.+*(('&%$#"!!"! "! !! ! ! ! ! ! &%#&%#)'&)'&)'%)'%)'%)'%)'%+)')'%+)'+)'+)'+)'+)'-+)/-+/-*/,*.,*.,*.,*-+)-+)-+),*),*(.,*/.,/-,,+),+).,+-,+-,*,+*,+*+**+*).--.-,//.10000/.-----000000//0///001000/00//0011/01122346345245235235134124124235135135135124246246135135135135246246246246146146136135135135135135Ὑִִ߼Ġȣ̧j_Usi^i_VlbXqg\ti_yncyncyncxncukaukaukawmcvmcvlculcpg_riaqiakd\f_Xc\Vb\Vb\Ua[U`[Ue_Zb\W\XSWSOTPMTPMOLHLIFIGDLIFIGDGDBGDBGECDCAECACA?CA?FDBDB@EB@ECADA?B@=FC@DA>@=:A>;@<9FB>B>:D?:E@;IC>A<7B=7G@:KD=LE=PH@bXNdYNh]Qg[Oh\PugX~n_p_uczgkwdvcq^t`wbqΰʧĥε~}w~znkhmjhda_][Ya_\a_]hecliflifvroz{xuwkzodqh^mcZkbYiaXd\U\UNZTMUOJMHCHD@GC?EB>DA>C@=DA?@><<:897554221010/,+*&&%$#"! #""""!  ""!$$#$$#$$#)('+*).-,.-,.-,,+),+),+),+),+)/-+/-+/-+/-+/-+/-+/-+2/-2/-41/1/-1/-1/,.,*.,*.,*.,*-+)-+)-+),*).,*.,*20.1/.1/-0/-20/10.1/.0/.//./.-00/0/.//.10000/.-----......,,,++,,--/00//0012/01123012346124023135134124024024013/13-/1-/1-/1-/1.02.02-02025025024/24/24/24035035035035035035035025025۸༙ĠĠȣȣ˧ޯh^Ui`WmcYrh^rh^uk`uk`uk`uj`tj`rh^qh^sj`qg^ne\kbZjbZjbZg_Xd]Uf_Xc\U`YS_YS_YSa[Uc]W]XSZVQXSOUQMTPMOLHOLHJGDOLHLIFJGDGEBGECECAECAFCAFDAFDBGDBHEBHFCGDAJGDFC@LIEFB?GC?KFBD@;KFAFA=E@;FA;>94?:5D>8KD=IB;TLC\SIaWL_UJaVKcWKseWyj[teVzjZ}l[n\~m[o]vbm¦ҴЮɫҹ~x~ytsokokgnkgkhekhekhevrnzvr~}s{othoe\iaXh_W_WP`YR[TNRMGJFAB?;A=:?<9>;8<:8;977549767656543210//-,,/..,++.--++*+*****,,,***('',,+(''*))*)),,+,,+,,+/.-/.-/.-/.-21/43153153153153153131/31/31/1/-41/41/31/63163153153030.20.20.20-1/-31/1/-0.,42042031053143142132021021010/10/211110100221111111000122//0///.//,,-+,-,-../0-././1./0023023/13-/1/02024135035025024/24/14/14.14.13.13.03.03,/1-03.14.14.14.14.13/25136136136136136136߼̦ԭԭϪϪΪ}l[e\Tmd[ne[qg]tj`tj`tj`qg^oe\lcZlcZne\kbZme\jbZjbZjbZiaZd]Uf_We^Wb\U`YS_YS\WQ^YS[VQYTOXTOXSOUQMUQMRNKMJFRNKOLIKHEHEBHFCFCAIFCKHELIFGDBHEBHEBGDAJFCMJFNJGJFBHEAJEAHD?D@;KFAID?HB=FA;;72C=7A;5HA;PH@ULD]SI_TJ]SHbWKk_Rn`SpbTrdUyiY|kZwfVta~i¦äϯȫֽ­óŶx~xvqkxsnrmhupl}wr~yzm}rfsi_nd[h`Wb[S\VOWQKKFBIEAGC@B?B?=@><<:9875665322211111...000///,,-011223677666666888888888:::;::=<<;::;::987987:87865865875875975975;97<97<:7<:7<:7<:7=:7=:7:85:85:85853853852752752964964641863753975964864:8675365342132021021010/22121011010000///////...-..//0.//-./-./,-.-/0-./,.//12/02135135024024/14035025.03.03-02-02-/2.13-03,/1-03-03-02,/2.14,/2,/2,/2.25.25.15.14.14036147147147ݻ⿛Ǣϩ˦ʦҭʦŢg[PaYQmcZpf]si_vkaqg]qg]qg]lbYkbYi`Wi`Wh`Wh`Wh`Wh_We]Ud]Ud]Uf_Wf_Wc\U^WQ`YS_YS_YS\WQ\VQ\VQ[VQVQMVQMSOKNJGUQMSOKLHELHEGDAIFCLIELIFMIFHEBIEBIFBJFCJGCNJFLHDMIENJEMHCHD?D@;KF@ID?KE?FA;<72@:5A;5LD=TKCULD^TJ_UKaVKcXLi]Pl^Qn`RqbTo^{jZtbwϲݿίڿ˴θij}v}vzszszų°|znuk`oe\lc[iaYf_X`ZTMIDKGCIEBIFCDB@BA?>=;765332221433555444888788777899:;;>??>??>>?>>>EEEEEDCBBCBBCBBCBBCCBDCBFEDFEDIGFGEDGFDJHFJHFKHFIFDGDBGDBEB@EC@FC@C@>DA>DA>FC@DA=A>;?<9?<9?<9>;8>;8<96;86=:8=:7<:7<97;96:86:86975875865764654865765655332322221333111../...--.,-.,--+,-+,-*+,+,.+,-*,-+-..02/13.03.02-02-/2,/1-03-03-02,/2,/2,/2+.1+.1+.1+.1+.1*-1,/2+/2+.2+.2,03,03,/3.14/26/25/25/25258258ཚơɥɥѬɥ̩_XPd[Soe\pf\ndZsi_nd[ne[i`Wi`Wf^Uf^UkbYkbYkbYh`We]Ue]Ue]Ue]Ug_Wd]UaZS^XQ^XQ^WQ]WQ]WQ]WQ]WQ\WQZTOYTOWRMTPKWRMVRMTPKJGCJGCKGCMIFMJFKHDLHDJFBMIEKGCKGCOJFJFBKGBQLGPKFID?D@;FA8?94=72D>7IB;TKCVMD[QG`UKbWLh\Pk^Qm`Rl_Qo^n]ucxػηîƳij{x{ŲƲDzƲ~sshxmcmd\jbZc\U`ZT\WRPLHJGDEC@@>=>=;:98;:9999888;<<===<<=;<EA>DA>DA>?<9>;9=;8=:8:86975875865764876865765433332322221111001000//0.//../-./,-.-/0-././1-/1-.0/13025024.02.14.13.03-03-02.14-04-03-03,03,/3,/2+/2+/2+.2+.2+.2+.2*.1*.1,/3,/3+/2+/2+/2+/2-04.25.25.25Ġġġ̨̨Ǥ˨ЬYRLc[Sj`Wrh^uk`pf]si_qg]ne[g^Ug^Ui`Wi`Wi`WlbYi`Wi`Wf^Uc[Sc[Sc[Se]Ue]U_XQ]VO_XQ\VO^XQ^XQ^XQ^XQ]WQ]WR[UOZUPZUP]WRXSNSNINJEKGCQLHNJFNJFLHDMHDMIEKGCLGCLHCMHDKFBLGBOJEPKFQLFE@;FA;GA8?94F@9GA:IB;QI@YPFTKB]SHcXLi]Pl_RoaSn^{k[ygqȲ{yz­йսDz­vsg{pewmcof^g`Yc]WYTOUQMFDADB@?=<=<;BBAA@@AAACCCFFGJJKKLLMMNNOONNORSSTTUXYYXXXVVVWVV[[Z^]\\[Z^]\\[Y_][_][`^[^\Ya^[_\Y_\Y_\Y[XU\XUZVSZVSVROVROTPMTPMRNJRNJSNJPLHPLHNJFPLHPLHPLHPLHMIEKGCJFCJFBEA>DA>C@=C@=B?<:>;9;97<:8;:8:98987765654544222333333222/00//0.//-./-./,-.+-.+,.-/1-.0.02025/24/14.14.13-03.14.14-14.25.15-15-14-04-04,04,04,03,/3+/3+/3*.1*-1*-1*-1*-1*-1*-1+.2+.2+.2+.2,03,03Ÿ ƣʧʧʧWPIc[SjaWrh^pf\ndZndZoe[oe[oe[lcYlcYj`Wj`Wg^Uj`Wi`Wg^Ug^Ud\Sf^Vf^Vf^V`YQ`YQ`YQ`YQ`YRb[Tb[Ta[T_XR^XR^XR\VP\VP[VP^XRYSNVQLQMHMHCMHDRMHOKFPKFNIDNIENJEGC?JFAKFBLGBLGBPKEQKFOIDHC=FA;D?9E@:D>8E?9F@9NF?OG@NF>OG?XNE]SInaUnaTm`Rp`q`n^zΩ᭥ո~}z{yìѸзҹ׾r}qetj_ulcoh`c]W[WRTQMQNKEDBHGEKJIIHHIIIMMMNNNOOPSSSYYYZ[[\\]\\\```bbbeddgffgfeihglkiomkomkmkikigligjgdhebkgdda^ea^ea^eb^d`\b^Zd`\e`\e`\a\Wc^Z\XS]XSXSOXTOXTOXTOXTOVQLVQLVQLUQLSNJRNIOKGOKGLHDKHDHEAEB?DB?DA>EB@DB?CA?B@>?><<;9987765432322222333223222112011/01./1./0-.0,./-/1-/0.02246136036025/25.25.14-14.25.15/36.26.26.26-15-15,/3+/3+/3+/3+/3+/3*.2*.2*.2*.2*.2*.2*.2*.2*.2*.2*.2,/3,/3ɥѬ̩ȦĢVPIe]Tg^UkaWndZndZoeZoe[oe[rh]uj_uj_xmbpf[mcYmcZjaWmcZmcZjaXjaXe\Tg^Vg^Vd\Tg^Vd\TaYRc\Tc\Te^Ve^Ve^Vb[Te^Vd]Vb[T_YR]VP\VP\VPWRLUPJMIDNIDSNHPKFQLGNJEOJEMHCHC?HD?LGBLGBMHBPKEQKFUOHQJDIC>B<7E@:D>8E?8LE>NF?LD=JC;SJA_TJeYNk_RrdWvhYrbyiYzǭx{~u~{~ux|swzxyx~ˮȨȰvxncrjbha[b]X_[X^[XUSQONLONMPOONNNVVV\\\bbbaaaa``bbbihhihhmllmlkmlkrporporpnrpnspnsqntqnvspwspuqnxtpsokytptpkupklgclgcmhcmhcmhckf`lf`id^gb\e`Zc]Xa[V\VQYTOYTOYTOYTOVQLVQLVQLUPLRNIRMIQMIPLHOLHOKGNJGKGDJGDIFCECAB@>A@>@?=?>=:98776544655555444334233123/01./0-/0-.0,./-/1.02025/24036025/25.25.14-14-04,03-15.26.26-16-15-15,15,05,04,04,04+04+04+/4+/4+/3+/3+/3+/3+/3+/3+/3+/3+/3*.1*.1ΫƤիSMG`XPg^Uh_UndZodZoeZpeZsh]sh]sh]vk_vk`sh^qf\qf\qf\qf\ndZndZkbXkbXh_Vh_Vh_Ve]Te]Te\Te\Te\Tg_Vg_Vg^Vf^Vd\Tc\Tf^Wf^We^Wc\U`YSc\U`YS]WQSNHNIDNIDQLFTNIOJEMHCMHCMHCNHCID?ID?JE@KE@NHCRKERLFSMFFA;D?9E@:D>8KD=IB;MF>UMDJB;OG>[QGl`Sk_Rzk]{l]}m]}iذë{txqxq}unyq{sk|tlzrivm~tyoxmx}{vqp}}ֶ}Ⱦȼ̿ɰ}shpiae`[hd_iebeb`\[Y\[Z_^]`_^ccbgfehhgjihnmlpontsrtrqxwuvtrvtryvt{xv{xv|yu~{w}y}y{w{w|w|w|wzu}xr~xrztnsmgsmgqkeqkeoicmgamgake_ke_ke_ic\d^X_YT]WQZUOZTOZTOYTNVQLVQLUPLRNIRMIQMHPLHOKGNJGMJFLIFKHEHECGDBEDBDCACBA>=<998::9777666566455445345012/12/01./1024/24247146036035/25.25.14-14-04,03,/3+/3.26-16-15,15,05,05,04+04+04+/4+/4+/4+/3+/3+/3*/3*/3+/3+/3+/3+/3+/3+/3+/3+/3SJAbYQbYPaXNlbWmbXmcXncXncXncYndYodYodYqf[ti^ti^wl`ti^rg\rg\ti^oeZoeZlbXlbXg]Tf]Tf]Tf]Tc[Rc[Rf]Tf]Te]Th_We]Ug_Wg_Wg_Wg_WiaYf_Wd\Ud\Uf_WaZS^XQ\UOTNHRLFRLFWQKRLGMHCKFANHCNICID?JE?JE@KE@KF@ID>MGASMFTNG?94@:5IC==>==;;::::999889778456346245356024136468358258147147.25.15/26.25-15-15-15,04.37.27-27-26-16+04+04+/4+/4+/4*/4*/3*/3*/3*/3*/3*/3*/3*/3*/3*/3*/3+/3,04,04e[Qh]Sh^SkaVncXocXlaVmbVmbWmbWpdYpeYsg\uj^xl`xl`xl`vj^uj^sh\peZpeZmcXj`Vh^Th^Te[Re[Rd[Rd[Rd[Ri`Wi`Wi`Wi`Wh`Wh`Wh`Wh`Wh_Wh_Wg_Wg_Wd]Ud]U_XQ_XQ_XQUOIUOIPJDRMGUOIPJENHCKFALFALGAJE?JE?HC=KF@ID>JD>KE?OHBGA;E?9F@:MF?UMEVNEULDSKBULCWNDreXk_RwiZtcláףͺĮϯ{{vqlpkfqkfoicf`Z`ZUhb[ng_le]sjajbYukatj_}qe}qdyj{jz˼{rzqƺ{q~voyszu~z}~{~|~{|z~{y}z|{}~~~~}}{{{{{xx{t{tyryrwowpixpixpisldsldsldsldsldqibng_kd]f_X^XR^XQ[UOZTOZTNYSNXSNRNIRMIQLHPLHNKGMJFLIFKHEJGEIFDGECFDCEDBBA@?>====<<<9::89978967856867957846869;7:<69<58;369259158148047.26/37.26.26-26-16.27/38.38.38.37-27,16,15,05+05+05+05+05+05+04+04+04+04+04*.3*.3,04,04,04,04maVpdXsfZpdXnbVnbWnbWncWncWncWth\th\th\ym`|oc|ocwk_th]th]ncXncYlaVi_Ti_Ti_Tf\RcZPcZPh^Uh^Uh^Ug^UjaWg^Uj`Wi`Wi`Wi`Wi`Wf^Uh`Wh`We]Uc[S`XQb[S`XQZTMXQKSMFPJDPJDSMGSMGQKENICLFAID?E@:E@;E@;FA;ID>GAIB>>>===::;99:89:68967968:79;7:<69<68;58;47:37:26:259159049048159059.37/48/38049/49/49-27-27-27-27,17,16,16,16,16,16,16,16,16,16,16.27.27-15-15-15-15th[th[th[wj]uh[uh[rfYocWrfZrfZui\xk^xk_{na{narf[rf[pdYodYodYmbWj_Ug]Rg]Rg]SdZPf]Sf\Si_Ui_Uh_Uf\Se\Sh^Uh^Ug^Ug^Ug^Ug^Uf^Ui`Xi`Xf^U`YQ`YQc[S]VOXQKXQKSMFPJDPJDSMGSMGSMGQKELFAGBID>JD>KE?NHARKDSLELE>JC>?=>><=>9:;9;=8:<9;=8:=7:<69<58;48;37:36:269159159048/48/38049/4905:05:06;05;05:/5:/5:/4:/4:-38-38-28-27-27-27-27,16-27-27-27-27.27.27.27.37.37qdWsfYvi\vi\vi\sgZsgZvi\vi\sgZsgZyl_yl_|nasg[sg[qeYpeYpeYnbWk`Uh]Se[Ph]Sg]Sg]Sg]Sg]Sg]Si_Uf]Sf]Sf\Sf\ScZQh_Ue\Sg^Ug^Ug^Ud\Sf^U^WOaYQ^VO[TMXQKXQKXQKSMFPJDPJDPJDSMGPJENHBIC>FAMF?XOG`VMYPF[QHTKAfZNnaS~n]l½IJѻϵ˲{wyurvrnplhqmi_[WTPL\WS]YTga[_YSme]tkb}shwlaznawg̼y׶|uij}}{xv{qyoyovm}tj}tj}sj|sjzqgypgvmeskbkc[h`Yd]V_XR^WQ]WQ\VPXSNWRMVQLRNJOKGNJFLIFKHEJGEJHFGECEDCFEDEDCAAA@@@???>>?<=>=>?<=?9;<;=?:8;>7:=69=59<58<48;37;26;26:15:059/38.3705:05:/4905;05:05:/5:/4:/4:/49.49.49.49.49.49.39/5:/5:05:05:05:/48/48/48/4804815:reXugZxj\wj\wj\wj\wj\zl_zl_zm_th[zm_zm_wj]reYqeYqeYncWl`Ui^Si^Sf[Qh^Se[Qh]Sh]Sh]Sg]Sg]Sg]Sg]Sg]Sf]ScZQcZQcZQe\Se\Sg^Ud\Sd\S_WO^WO^VOVOHXQJXQJXQJUOHUOHRLFPJDPJDPJDMHBKE@HC>HC>FA?@=>?=?@>@A;=?:8;>7:=69=59<48<37;37;26:16:15:05904905:16;16;06;17<16<16<06;06;06;05;05;05;/5:/5:16HC>E@FA?A?@B=?A<>A:<>9;>8:=7:=69<58<48<37;37;26:16:15:05916:05:05:16;27<17<16<16<06;06;06;05;05;16<16<16<16<16<16<16<16;16;16;16;27;27;27;27;48=tfXwi[}n_papa|n_|n_|n_|n_|n`sfYnaUh\Pk_SmaUmaUmaUj_Sg\Qj^Sg\Qg\Qf\Qf\Qf[Qi^Sf[Qe[Qe[Qe[Qg]Sd[QaXOaXO^UL^UL`WO`WOZRJTMFWOHTMFVOHXQJXQJXQJRLFUNHTNHOICLGALGAID?LFAID?ID?D?;D?;GB=ID?GB=E@;E@;E@;HC>NHCOICOICSLFWPI]UNaYQ`XOe\Rh]SmbWnbVnbVbWLrbuƩǶѾͶּܿ׼}z~|z}{xxusspmifcqmizup}wqzs{}|uǻ췧˵¯ǵοɺ´}{yxxxv|qyn~tj~sizqgwndvmdne]jbZjbZiaYh`Yf_Xe^Wa[U^XR\WQ[VQ\WRXSOVRNUQNSPMQOLNLIJHGIGFGFEFEEDDDCCCEFFDEEBDEACD@BD=?A<>A9<>8;>7:=6:=69<58<48;37;27;26:16:15:16;16:05:05:16;27<17<16<16<16;06;06;17<17<17<17<16<16<16<16<16<16<16<27;27;27;27;27;37<49=xi[xi[qaqbqb}n`}n`zl]zl^nbUi]Qh]QfZNh\Qk_Sh\Qh\QeZOj_Sg\Qg\Qg\Qg\Qf\QcYOcYOf[Qe[Qe[QbXObXO_VL^UL^UL^UL[RJZRJUMFTMFOHANHAQJCVOHXQJWQJRLERKETNGQKELFAKFAKFAHC?HC?HC?EA7:>6:=59=59=48<38<37;38=38<16:05:05:16;27<27<17<16<16<27=27=17=17<17<17<17<17<17<17<27<27<27<27<27<27<37<37<37<59=rb~o`qbqb~o`{l^{l^ugYobUi]Qi]Qi]QfZOi]QfZOeZOeZOh\QeZOeZOdZOdYOdYOaWMaWM`VMcYO`VL_VL\SJYQH\SJ[SJ[SJXPHUMFRKCOHAOHANHANGAPJCRLEWPITNGOICNHCNHCKF@KF@JE@JE@GC>E@FB>D@D@AC;>A97:>6:=59=59=48<38<49=48=38<27<27<27;49>39>38>38>49?49?39>39>39>38>27<27<27<27<27<27<27<27<27<27<37<37<37<38<48<59=o`o`rbrb~o`{l^xj\reWl`Sl`Si]Qi]QcXLfZOcXLcXLbWLeZOeZOeZOdZOaWL^TJ^TJ^TJ[QHZQH]SJZQHYQHYPHYPHXPHXPHUMEOHALE?NHANGANGAMGAMGARKEOICNHBIC>KE@JE@JE@JE@IE@D@;D?;C?;C?;@=9@=9B?;B?;DA=DA=B>;GC?GC?GC?QLGYSN\VPf`Yle^ph`wnevmdxnexnc}rgymq|n~not|ƫֿؿ~~~|{}{y|ƶɶƮƾ̿ҿм˴ƱϼʹǶij~|{yvvssznti{qfukarh_qh^pg^ld[kcZjbZiaYe^Vc\V_YS^XRZUOVQLUPLSOKQNJTQMPNKMJHKIGLJHLKJKJIIIHJIIJJJIIJIJKGIJFHICEG@BE?BD=?B<>A;>A:=@9<@849=48=38<37<27<38=49>49>5:?4:?4:?49?49?49>49>39>39>39>39>39>49>49>49>49=49=37<38<38<48<48<48<6:>|m^|m^|m^yj[|m^yj\vgYpbUobUobUl`SfZOcXLcXL`UJ]RH`UJ_UJ_UJbWL_TJ\RH[RH[QH[QHZQHZQHWNFWNFVNEVNEXPHUMERKCOHALE?LE>NGAMG@MG@LF@ID>GAHC>HC>GB=GB=ID?FB=C?;C?;EA=DA=B>:A>:A>:A=:EB>C?<@=:EA>EA>EA>IFBXSNa\Wga[ohaskdyqi{ri}sjukynxmsy}~{̰ܔŶϽθɷúҸĮįϼ˹ȶŴzwwwvt{o}rgwmbtj`si_rh_ne\md[lc[jbZiaYh`Yf_Xd^Wc]Va[V[VQWRNUQMTPLTQNSPMQNLMKILJHHGFKJIIIHJIIHHIGGHGHIHIJFHIEGIDFHACF@CE?BE>AD;?B;>A:=A7;?7;>7<@7;?6;?6:?48=38<49>49=5:?6;@5:@5:?5:?5:?4:?49?49>49>49>49>49>49>49>49>49>49>59>59>59>59>6:>6:>6:>8;?zk[|m^yj[vhYvhYvhYpbUpbUj]Qi]QcXLcXLcXLcXL`UJ]RH_UJ\RH\RHYOFYOF[QH[QH[QHZQHZQHTLCTKCSKCSKCSKCRKCOHAOHALE>KE>KE>MG@MF@LF@GA9A<7E@;B>9B=9FB=C?;EA=B>:B>:D@;B@=B?AD=AD<@C5:>59>59>59>59>59>5:>5:>5:>6:>6:>6:>6:>7:>8ME>OH@NG@KE>KD>KD>JD>GA9A<7@<7=95B=9A=9>:6C?:B>:B>:A=:A=:>;7?<9?<9><9>;9=;8=:8A><@><@>;MJGVRO^[W_[Whc_kfaqkerlevohxphyqi{sj~tkvkwl|pu{Ͳҷչٺ蜚Ŷìºþ˱ƮԾϺ̷ɵƳı¯ñ¯|yv~q~q}pwkth{peuk`tj`pf]ne\jbYiaXh`Xc\Ud]Ve^Xd]Wb\V^XSZUPXTOVROWSPUROVSPRPMNLKMKJKJIJIHJJIIIIHHHHIIEFGDEFCDECEGBDFACE@BE?BD>AD=@C>AE=AD<@D;?C;?C:>B:>B9=B:>C9>C9>B:?D:?C9>C9>C76:>6:>6:>6:>6:>6:>6:>7:>7;>7;>7;>8;>qcUvhYseWpbUg[Ng[NgZNdXLaUJ`UJ]RH]RH]RH_UJ\RH\RHVMCYOEXOEXOEULCRIARIAQIANF>NF>PHAPH@ME>LE>ICKD>HB;GA;D?9D>9C>9>95=94=94:62<84>:6@<8?;8?;7<85;85=:7<97<96=;8=:8<:8;97=;9?=;@>AD=@D<@DB;?D;?C<@E;@DC9>C9>B89NF>MF>MF>JC9A<7>94=94=94<84730952;74=95?;7<85;85:85<96;96;86:86;97975:86<:8=;:IGEQNLYVT[XUa^[da]ea]lhcpjesmhunhyrjxqizrj|sjwm{p|qtx}|~ʱī׺ѵɞ~|}õŶμ̴ʿв˰©־һ˶űï®ï}|y{{zutylui}rfyncxmbsi_of\pg^iaYh`Xf^Wd]Vc\Uc]Wd^Xb\W`[V\XS_[V]YU[XTSQNROMPNLOMKKJIJIHHHGIIIHHHFGGGHHFGHEFGDEGDFHCEGDFHADF@CF?BE?BE>AD=AD=@C=AE=AD<@D<@D=AE<@E<@D=AFOF>NF>QI@NF>MF>ME>ME>JC94>94;72;62=8473073/63/851:73963963852:74;96;86:85753643864;98:97><;IGFQOMVTR[XV_\Zd`]da]fb^mhduojvpjxqkzrkyqjyqh{rivl{qvyx{{~ªͳдڽ͵׾~ξʹѽöƪԼ̵̶ʴį~{xxwwvxuq{n~rf|qf{pesi_of\pg^kcZld\h`Yf_Xd^WgaZe_Ya\V]XS^YU\XTZVSXURWTQUSPPNLNMKMLJONMLKKKJJGHHFGGGGHFGHEFGDEFFHIEGIDFHCEHBEGADG@CF>AD?BF>BE>AE=AD>BF>BE=AE=AE>BF>BF=AF=AE=AE=AEA;>A;>A;>Am_RdXKcWK]RGZPEZOEZOEZOE\QG\QG[QG[QGULB[QGRI@RI@OF>NF>NF>NF>KC=BE?CF?BF@CG?CG?CF@DH@DH@CG?CG?CG?CG?CG?BF?BF>BF>BF=AD=AD=ADA;>A<>A<>AKC;KC;KC;H@9E>7D>7?93>83>82>82=82=82:50:5095062.94084051-51-0,)/,)40-30-30-2/,630630752742:85753653642532431765:98?>=LKJQPOWUTYWU][Y^[Yda_eb_gc_lgcqlgsmgtnhvohxpizrjyqh}tkzp}ruzy}~}êɯѴ¢׽ɻ~{~{ϾðDZÿz~ѽڽѷϵ˲˳«|{zzvurtsrq{m|oxkthuiyndwmcof]kbZiaYjbZhaYha[d^Xb\Wc]Xc^Ya]X_[W^ZVZWSXUSWTRWUSXVTVUSUTRSRRNNMMMMOOPNOOKLMJKLFGHFHIFGIEFHDFHEGIDFICFHBEHCFIDGJDGJCFICFIADG@DG@CGAEHADHADH@DG@DG@CG@CG@CG@CF@CF@CF@CF@CF@CF@CF>AD?AD?BD?BD>@C>@C>@C?AC`TI_TI_TI\QF\QF[QFXNDXNDXNDWNDTKBTKBQH@QH@PH@KC;JC;G@9D>7A;5>82>82>82>82=82=72:5072.72.62.4/+3/+51-0-)-*'/,)/,)1.+.+(0-*2/,41.530742641:75753642532431321876=<;BA@IHGRQPWVU[ZX\ZX^\Za^\da^ea^jfbokfqlfrmgtngxqiwphyqi}tkxnyoutyx||ªǭոݾ˪|~z{~ŶӾԺɸ|~zy|v|u{u{Ьɮ}{zvuqp}o~p}o{nym~rgshwmcukbpg^kc[jbZh`Ykd\ib[ke_e_Ze`[c_Z_[W^ZV\YU\YVYVTYWUXVTVUSUTRUTSTSSUTTRQQOOOMNNLMNMNOLMNKLNJLMHIKGIKHJLEHJFIKFHKEHJEGJDGJDFICFICFIBEIDGJBEHBEHADHADGADGADGADGADGADGADGADFADFADFADFADF@BE@BE@BE@BEACEACEbVJaVJaVJaVJ^SH]SHWNDWMDTKBTKBNF=NE=PH?PG?JB;G@9D=7A;4A;4>82>82=82=72=72:50:5072-62-4/+3/+3/+3/+-*'-*'-*'/,(,)&.+(.+(2/,1.,52042/641531642642753643754876::9A@@LKJTSRVUTXWU\[Ya_\ca^c`]fb_ea]lgcojeqkerlftmfxpiwoh{sjxnwm}ruyzy{~é˰ǥھƷz}x~yȷҾ«zz{wxur~}z~yt{vp{v|wr{vq{v{|}zrpnrpoonmmmlnnnmmmvtr§txzxvrqpzlxkvjuhuivk}rgzpfsj`qh_og^me]kd\ng`le_jd^jd_d_Zb]Yb^Za]Y]ZV]ZW\YVXVTYWUWVTVUSVVUUUTTTSSSSRRRMMNNNONOPNOPMNOMOPMNPLNPKMOIKMHJMHJLGILGILFIKFHKEHKEHJEGJDGJDGJDGIBEHBEHBEGBEGBDGBDGBDGBDGBDGBDGBDGBDGBDGACEACEACEBCEaUJ`UJ`UJ`UJ_UJ\RHYOEVMCVMCPH?ME=ME=G@8G@8D=6D=6A:4@:4=82=72=72=72:5/94/94/62-62-3/+3/+0,)0,)-*'-*&,)&.+(,)&+)&-+(/,*1.,30-42/631531864764653543654543<;:BBAKJIRQPWVUWVUYXVb_]b`]c`^da^gd`mhdlgbojdqkerletmfxph{skvmwnwm|qvyz{zêʯܾãֲȱ~x~yzòм«Ը԰{unwrmuqnurovsp}{x}}{zuxsowsnytp}yu}y{xvtqqomtrqonmjiimmmiiijkklllppqrrryxw~uyvrp{m~p|ozmsgth}rgxndvlctkbriauldskcpible_jd^ke_ic^id_c^Zc_[b^Zb_[`]Z]ZX[YWXVTWUTWVUVUTUTSTSSRRRUUUTTTQRRNOPOPQNPQOQRNPQNOQKMOKMOJLNHJLHJLGILGIKFIKFHKGJLGILGILGIKFIKFIKFHKEGIDGIDGICEGCEGCEGCEGCEGCEGDEGDEGBDECDE`UJ_TJ_TI_TI^TI^TIULCME=MELKJQPORQPTSRXWUYWUa^\b_\c`\d`]ie`nidojepketmgunhunfvog|tkwmxnyo|q~rwxx{˰ͱâȥٴӿ~z}~°˷ҺķͿĵ̺ï¸ιvus^]]uy}zqfa]`]Z_][][Ydcarqowvtvusywv|zx}{}}~z}~y~y~yzu|x{w}y~zzwsrommkiigegedjihihgmlknmmhggmlkrqpmkjrqosqowtqxso~ukscvsq{m|mwjzm{nylti}shvk~tj~tk|siuldskcunfohaqkdoicmhblfaje`hd_da]a]Za^[`]Z^\Z][Y^\Z[YWYXWXWVWVVXWWYXXXXWWWWVVVSTTSSTRSSPPQOPQNOPNOPMNPMNOKLNJLMHJKHJKIKMIKLIKLIJLIJLIJLHJKHJKHJKHJKHJKHJKGHIGHIGHIGHIGHIcXM]SI\RHWNDVMDSKBPH@KC7B<5B<5B;5?93<61>93;61;6183/83/83/83.73.0,(/,(/,(-)&,)&,)&)'$)&$)&$+(&+(&*(%/,).,)0-+2/-752742642753431542654543998@?>JIHPPORQPTSQVUSXWU]ZXa^\b_\c`\fb^ie`nidojdqketngvogyqj{rj~vmyoxnyn|qtvwzɮαعĢ߷ȯ}ƲDZĹķŶxpidefrv{w~]YV[XVWUT[ZX\[Zllktssssrsrqsrqsrpzxvzxv~{y||y{}y}x}x{|x|w{v}xzvrrokkhelifjhekigljhmkjnmkpnmlkjkjinlkkihigeqolpmjtplqlhrlf|oziwrp}nxjviylwkxlshvkujxmvlvm~ul{skwogyrjwpismfqkemhckgbjeajfbgc_c`\da^b`]a_\`^[`^]_]\^][[ZYZYXYXXXXWWWWVVVTTTSSTRSSRRSPPQOPPOOPNOPNOPMNOKLMKLMKLMJLMJKLJKLJKLJKLJKLJKLJKLIJKJJKJJKJJKJJKJJKHII_TJ^TJYOFXOFSJBPH?ME=G@9D>7D=7A;5A;5>83;61;61=82=8283.73.73.73.40,/,(/+(/+(,)&,)&,)&'$")&$)&#+(%*(%*(%.,).+)0-+/-+752742642531653542653543998@?>HGFKJIRQPVTSVTSVTS[XV]ZXb_[c_\fb^id`lgboidpjdsmfungxqi|skvnvlyo|q}rzosv{}~©˰ϲշȦЩϰɪŹѸ̤~ea]`]Ymljeghqvzw~|yphTQNUSRRQPRRQVVV^^^hhhmmmmmmmmlmmlppoqpoxvtyvt|zw}z~zyur|w|w|w}xt{vytpxsosoklhemjgifcdb_hfdigemkiomkmljmkijhfljhgeceb`ec`lhekgbje`oicxogufrsoo|mzlxjxkvjzmzn~shvk|qzovl}tkxphyqiyqjyrkunhsmgojdojelgblhckgbgc`d`]b_]c`^b_]b`^ca_b`__^\^]\[ZYZZYYYXYXXXXWVUUSSSSSSRRSQRROPPOOPOOPNOOOPQQQRMNOKLMKLMKLLKLLKKLKKLKKLKKLKKLKKLKKKKKKKKK]SJZQGZQGTLCOG?G@9D=7F?9F?9C=6C=6@:4;50:50=72<72:5095072.62.40,/+(/+(.+(,)&,)%+(%'$!&$!(&#*(%*(%,*',)'0-+0-+/-+530742642531653542653543876<;:FEDKJIRQPUTSVTRVTRYVTYWT^[Xb_[ea]hd_kfalgaoidqjdtmfwphyqi|tkwmxn{p|q}rrux|}~Ǭ˯вݼЪԱñӸwtrjgenlkffgilosx}~d`[MKISQPRRQSSSQQRUVW_`adefijkijkjjjmmmmmmqpputsywv}{y{yv|yvwtquqnyuqyupytpuqmrnjkgcgc`c_\da^hebjgdkhfigemkijhffdb`^]hfcjhejheec`fc`b^[fb]kf`mgatldrh^uexrp~n~o|mwjzm{nylwk|pzo{p{qwn}sjxpgtldtmewohungsmfuoitnhrmhpkgmhdkgchdafc`eb_fc`eb`ca_b`^a`^_]\^\[]\[ZYXYYXYXWVVUVUUUUUUTTVVVTTTSSSQQQQQQPQQRRRPPPNNONNNNNNNNNMNNLLLLLLLLLLLLLLLLLLYPGTKCQIANF?IA:C=6E?8E?8E>8B<6?:4<72<72>94<72;6294084062-3/+.+'.+',(%+(%+(%+(%&$!&$!(&#*(%*'%,)',)'0-+/-*/-*1.,742420531753642532643876=<;GFDMLKOMLVTSVTRVTRWTRYVTZWT\YVea]hc_id_lfamgapjcqkdwohxph{sjvmwmxn}r|q}qty{|}~Ǭʮϱ̸̦ܵ©Ѿ}zxmkjgggijkx{y~}tlSPNQONSSRSSSQQRTUVSTURST_`babdfhifgiijkmmnmnnqqqrrqtrqusqvtrrolmjgkgdkhdlhdiealhdhd`gd`a]Z_\Yda^c`]jgdkhegdbkifeb`\ZXa^\ca^c`^`^[c`]b^[`\Xd_[ic^ngatldof]|oc}kyopp~n~oyk|ozm{nym|ps}r{pyoulxogtldrjcwohyrkuohsmgtnhrmgnieqlgrmhlhdkgcifchebgdafcaca^b`^a_]^][]\Z][Z\[ZYXWWVUVVUXWVWWVWVVVVUVUUUUURQQSSSSSRQQQQPPPPPOONONNONNNNNNNNMLLMLLPH@PH@PH@JC8D>8D>8>94<71;61>83=83=8384/83/73/51-0-).*'+(%+(%+(%(&#(&#&#!*'%*'%,)',)'.+)0-*/-*/-*1.,52042/631531642532643754=<;EDCLJIOMLPOMXVTXVTXVT[XUZWT\YU]YVgc^hd_kfamgangaqjctmewohyph~ulvmwmzp}rsuxz{~~ƫɭΰܻ⿜ιyz~Ƿ}zsqpiii~rjcXURYWUXWVTTTTUUTUVXY[TUWUWYY\^acefhjkmokmolmolnomnonoopponnmpnmigea_]_][`^[a^[b^[_\Xb^[da]a]Z`]Y_\Yd`]eb_da^ec`a_\fcab`]\ZXa^\[YWc`^a^[^[X_\X`\Xd_[ic]ng`mf^ne]oe[~oaotqop}n{l|m|n|ozn{n~q|p|q{pyoulxof{rjvngtmfungwqjtmgrlfuoiuojqlgpkfqlhmielhdiebhdagdaec`db_da_ca^`^\]\Z][Z\[YZXWYXWXWVXWVWWVWVUWVUVVUVUUWWVTSSRQQRQQQQPQQPPONPONOONOONOG@LE>LE>KD=F@9F@9C=7C=7@;5=83;61=83<83<83<7395173/41-2.+-*'+(%*(%*'%(%#(%#%#!*'%,)',)&+)&-+(/-*/-*/,*1.,20.20-41/531642642653764><;BA?GEDONLQOMTRPXVTYVTYVS\XU\YU_[Wb]Yfa]kf`lfamganharjcumexph|sjvlwmzo{o~rtyyz{|}ƫɭͰۺཚϸv}sv~||yu~swuzv}~|z{x{w{vvpj~wo{tltmgkfab^[][X\[YVUTVVVVVWYZ[Z[]Z]_\^`]`badgadfacfhknhkmikmjlmklmijkhhhiiibbaa`_][Y[YW\ZW]ZW`]Z^[Wa^Za]Z`]Y`\Y\YUa]Z]ZW^[X]ZX\YW^[YZXVWURa^\^[Yfc`^[X\YU_\X`\Xd_Zke`jd]skcmd\pg]vj^tdqsqqo}m{l~o~p|n}o}p~q|p|q{pyn|qvl{rivnfumeunfslezslwpiuoisnhtnirmhqlgnienjfifbheagdafc`eb_db_ca^a^\`^\_][_][^\[]\Z[ZXYWVXWVXWVXVUWVUWVUXWVXWVVUTSRQRQPRQPRQPRQPNF?MF?MF?JC=GA;E?9B<7?:5=83?:5<73<72;72;72;7284062.1.*,)&,)&*'$*'$'%"'%")'$)'$+)&+)&+)&/-*-*(/,*1.,1.,20.20-2/-531530642753864=;9?>IC8>94>94>94=94;72;62:62:6284052.51.,)&,)&,)&'%"'%"'%")'$+)&+(&+(&+(&/,*-*(/,*1.,0.,20-20-2/-31/530420753753986><;FECFDBNLJROMROMYVTZWSZWS]YU]YUb]Yd_[e`[lf`mg`ngamf_rkcumewnfzqhvlvlynzoswxyy|x}~~}ŪǫʭѲۺ߼Ʊ}v~uzqvxxzpwnfnf^qiaqiatld~ul{q~tyowm{pvlzoynwlwk}pr̽xz}~|{yxvxvsomjlighfcjhfeca][Z]]\^^]YYYWWXWXY]_a^`b]_a^acbehadgcficficehcehcfhace]_`^_`]]^[[\]\\^]\ZYXXWUZXVXUSYVSVSQTQNTQNSPMSPMUROWTQ\YV[XUZWTYVTSPNWUR_\YYVT[XVXVS^[X^[W\XU_[W^YUd_Zic\kd]sjbpg^pf\wl`vg}kwmrsnopsq~pzl{myl|o|p}q}r|qxn{riyqhzqixphvogwphungrketnhuoitnhrmgsnhpkfmhcjeaieaifbgc_fb_eb^da^c`^a^[^\Y^[Y][Y_\Z^\Z^[Y][Y][Y[YWZXWZXVXVUXVTXVTPIBMF@JD>JD>IC>D?:@;6=84=84<84:62:6295273073041..+(-+(+(&)&$'$"+(&+(&+(&*(&*(&/,*.,*.,*0.+0.+0.+20-20-2/-31/31/520420753864?=;GECFDBKIGRPMRPMTQNZWSZWS]YU]YU^ZUc^Ye`[hb\ke^ng`oharjcskcvnewof{qhvlyn|psswxyz|}|{|~ƪǫʭѲۺὙ麲xnwo~um}ulzqwnvn|tkjc\jc\g`Yjc\phavnfzqi}tk~ul{rjypg}tk{rivlujxnd~shwk|j~}|zxwusqvsqigeligkhfca_^][_^]ZYY[[[YYZ_`a]^`abdbdfcehgilfikhkmgjmbeg`be`bd\]_\^_[\]YZ[Z[[[[[ZZY[ZYWVTVTRWURURPSPMSPMMKHSPMRPMUROTQNVSPURPWTQVSQRPNROMYVT[YV[XUUSP[XU]ZW^ZWd`\b^Yc^Yjd^le_qibne]qg^qg\|obvg}jworrnorp~o|mzl{m}p|o|pzo{pyozpvm|sjzqiyphwogunfvogtmfqjdvoitnhsmgrlfpkfojelhcie`hd`ieaheafb^ea^da^b_[_\Y_\Y^[Y^[X][X^\Y^\Y^[Y][Y][X[YWYWUNHAIC=IC=KE?HC=C>9>:5>:5<73;7395195163/63/62//,)-*'-*'(&$,*'*(%*(%,*'*(%*(%.,).,)0.+0.+0.+20-2/-2/-2/-31/31.31.420631975><9C@>GECHFDQOLSPMSPMURN[WS[WS^YU^ZUc^Yd^Yf`[ic]le_oharjcskctlcwnexof~tjyn}r~rsuxxyxy{~|}|ũ˭ҳݻܹátzrjtmevngunfrkcib[f_Y]WQ]WQc\Vc\Vf_Xle]ume{sj{sk{skvnfzqi{ri|sjtlcvmdri`yodth~|{vtrqolqolkhfjhfigeeca]\Z^]\\\[ZZZXYY_``bdefhigikfhjgilfhkhjmgjlegi`bd]_aYZ\Y[\XYZXYZZZZ[[ZYYX[ZYVUTRPOSQOQOMROMRPMRPMROLROLTQNTQNSPMROMTQOMKIMJHQOMVTQXVSXUSRPM]ZW]ZW]ZVc_[a]Ye`[lf`mg`le^qiakcZqg^rg\|obtdmyorrpqr~n~o~o|n}o~p|o|p{n{o~rzo~tj}si{riyphwogvnftmeunfsmfrkeunhtmgrlfojdpkemhclgbkfbhd_gc_hd`fb^c`\b_\`]Y_\Y_\Y^[X^[X][X]ZX]ZW^[Y^[XOIBJD?JD>GBFA:6<84<8496252.51.2/,0-+.+)0-+.+)-+)+)'+)'+)'+)'+)'/-+/-+1/,1/-/-+31.1/-1/-1/-31.31.1/,1.,0.,641753><9GDBHFCHEBOLIROLTQMVROVRN[VR]XTa\Wd^Ye_Yga[jc]kd]nf_qiatkculcvmdwnd}rhyn~rr~qsttuvxy||ħˬ׵侘Ҭ~r|}umsmfpjdlf`lf`]XSQMHQMHKGCNJEYTOa\Va\Vc^Yic^tngqkelf`oicga[jd^hb\hb\ic]mg`ohaog`sjbují~||yvzwttqnnkhgdaca^hfcgebfdajhfhgefedddceeefffjkkklmrstprsoqrlmokmnijkfghabc\]^\]^Z[[[[\\\\]]\^]\][ZYWUZXVUSQYVTWTQTQOTQNQNLTQNSPMPMJLJGKIGKHFOMKNLJMKIRPNYWUYVTYVTVSQXVS`]Zc`\d`\ea\id_mgake_mf_le]ld\kbZndZqf[qdsc}jyqtrusqpsqp}oyk|n}o}p~qznvkuj{qgwndvmdxpgwnfumetldskdslerkdqjcpicpjdmgblgamhblgbkfakfahc_ea\d`\fa]ea]a]Y`]Y`\Y_\X_[XJE?GB=C>:B>9@<8@<7=96;7485285241.0-*1/,31./-*-+(-+(-+(-+(+)&-*(-*(/,*/,*1.,1.,1/,31.1/,1/,1/,31.1/,1/,1.,0.,20.631974B@=GDAIFCJGDOLISOLUQMWSOWSOZUP`[Ub]We_Yha[hb[kd]le]og_rjasjavmcyoezpf~shxl|p}qssruuvvwz}~}z}~ŨƨӲǞzqmgarlfhb]gb\XTOJFCJFBGC@LHDNJGQMI[WR`\Wc^Ypkfmhcgc^e`\e`\hc^ea\fa\fa\gb]hc]oicpjdpibyqi{qvŹ~|xyvswsptqmqmjgdafc`eb`jgeifdmkhkigihfmlkqpouttsssrrrpqqoppqqrpqqopqlmnijkdefeeebcccccaaa__^`_^a`_`^][ZXZXUXUSVSQVTQTQNWTQTQNSPMSPMLJGLIGMKIMJHLJHNLJOMKQOMSQO][XXUSWUSZWU]ZWea^c_[fb^je`kf`oichb\mf_og_qh`ne\qf\qf[{na~o`zgwttywuorpqp}o|mzlq}pymxlvk{qf|qgzpfvmctlcwofvmetldskdtleskdqjcnhaoibnhbmgalf`ke`lgaid_hc_hc^ea\d`\fa]c_[a]Y`\YFB=D?;A=9?;7?;7<95<95<8574130.30.30-30-1.,,*(,*(,*(.,*0.,.,*0.,.,*.,*0.,0.,0.,20.0.,1.,1.,30.1.,1.,1.,1.,0.,42/853974HEBHDAIFBNKGPLHTPLVQMXSOXSO[UPc]We_Yf`Yib[jb[jc[og_ph_sjari`ukbxnd{qfthxl{n~qsrruuxyw|}~}~ǩ̬ѯ֨umgapkffa\b]Y[WSHEBHEAJGDJFCLIFNKHSPLURN\YUd`\c`\fb^c_\^ZWc_\`]Zc_\c`\c`\d`\ea]c_[fb^jealgatmgyrkzrw˕~{|yu}yuwsozvrwsosplsoktqmvspxtqyvswurusq|zx~~}{zyyyy{{zwwwtttssssssmnnmmmfffdccddcedccbadbaeca`^]\ZXZXU[XVYVSYVSWTQWSPVSPVSPROMOLJNKIMKHLJHNLJOMLNMKNLJRPOYWUYWUYWU[YW[YVa^[d`]b^[ea]id_mgblf`hb[le^nf_ph_pg]pf\sh]zm`sdsaryvytuqtpopq|mzl}o}pzmvjwk|qfzoezpfyoeulcskbvneumdtlcrkcsldpiaohale_jd^mgamf`lf`ic^jd_id_hc^hc^e`\d`[fa]C?:A=8@<8>:7>:6;85;8596342/42/42/20-0.+.,),*(0-+0-+0-+.,*0-+.,*.,*0.,0.,20-42/20.0.,0.,20.0.,0.,0.,0.,0.,42/42/631A>;IEBJFCIFBQMIQMISNJUPLWRMWRM\VP`ZTd^Wg`YhaYiaZkc\nf]of^ri`ukbvlb{qf}qftiyl|osrrsuvxyz~}~}~}|ŧȩѯ빦ï~oicqlgiea]YVYVRKIFFCAJHEB@>GDBKIFKIFOMKTRO][Xb_]a_\a_\^\Z[YW[YW^[Y^[Y^\Y^\Y^\Z_\Z_]Z`]Zc`]iebrni{vqxrmxrzsy}~βޑ{}xt|wszvs|xuzwt{x}z~{y~~~~}|{zwvurqputsnmkomkmkinligdbb`]c`]\YV\YV\YVZVSYVSYVRUROOLJNLIPMKLJHKIGJHGLJIPNMONLQPNXVUXVTWVT\ZXZXV_]Zb_\`]Zc`\d`\ea\id_hb]ga[kd^skcrjarh_tj`ti^{nbtfxfr{vytustropp~o|n{mylxkvj~rg|qfzpeyndwmculctkbsjaskbtlcskcrjbpiaohale^kd^jd]ke_jd^ic^hc]jd_ic^hc^gb]@<8?<8=:6=96:74:74:7485241/31/31//-+/-+-+)/-+1/-1/-/-+/-+/-+-+)/-+/-+31/31/5310.+0.,20-0.,0.,0.,0.,0.,0.,42/631630C@=IEBJGCNJFQMIQMISOJWRMXSMZUO\WQa[Tjb[haZiaZld\ld\og^ri`sj`vlbwmbzod~rguizm{m}prrsuxxy|~~}~}zĦ£ӯͱĬɵ}ysmrnijfb[XUTROQOLDB@HFDECAGECDBAFDCLKISQPWVT^\[[YXZYXZYX\[ZWVUTSSTSRTSRTSRVUTYXWYXWYXW\[Yfdbigejhepmjvrowsovqmzup{u|v}vyz{~~Ṭ}|zx}{x~{xvtquropmjkheifbjfcb^[`\X`\X]YU\YUYURUROTQNSPNOMKKIGJIGGEDMLJONLLKINLKTSRYWVVUSXWU][Z][Y`^[a^[a^[b_[fb^gb^kf`gb\f`ZhaZle]qi`vmcvlaymbymaxiudpytytssrrqo~n~o}nykwjuivj~sgzod{peyodwmcxndtkbsjari`rjaskcnf^qianf_me^ic\hb\ha[kd^jd^ic]hb]gb\?;7>;7<96>:7;85;8574252030.30.20.0.,.,+0.,0.,0.,0.,.,+.-+.-+.-+/-+31/31/5311/-/-+1/-0-+0.+0.,0.,0.,0.,42/641631>;8FB>GC@KGCNJFRNIRMITOJXSN[UO]WQ]WQb[TiaZkc[ld\of^pg^ukaulbvlbwmbzod|pe}qetgviyl~ppsuvxyy|~}|ĦƛŲĬеκ}|vqsojjgca^[USPROMGECFECHFEBA@DCBHGFJIHNMLVUUZYYYYXVVVZZZZZZWWWTUUPPPPPPRRRRRRRRRTTTXXX[[[bbabbacbadcbdcbgfdmkinkiqnlrolspluqmxto|wr}xrys~xxyyzywx~~~|{w{vrvqmtoknjfie`fb^fb]`\X\YU[XTZWTTQNRPMNLJMKILJIHGFJIGNMLKJIJIHPPOUTS^]\[ZY[ZYYXV\ZXa_]][X^[Xa^[jfahd_je`ic^jd^le^nf_ph_ukbwmcwla}qdsfzj}hytytusqrsr~nozlviugvh~rfsgzodyncyodxncvlbukbsjari`qh`og_nf^kc\le]kd]jc\g`Zf`Ye_Yhb\ga[=:7=:7=:7;85:8564142042042020.20.0.,0.,0.,0.,10.0.,.,*0.,.,*.,+20.20.42020.1/-1/-/-+/-+0-+0.+0.+0.+42/42/641631@=9FB>HD@KGCOJFSNISNIUPJ[UO^WQ^XQ^XQc\Uh`XjbZof^pg^si`vlbwlbxmb{pd|pesg{pd}pdwj|n}nqquvxy||~}|{~{|¤̸߲չֺŲwrmtplkheda_XVTUSQOMLGEDHGFJIHDDCFEELKKMMMUUUTUUVVWSTTVWXVWXUVWUVWRTUPQRPQRPQRTUVVWXXYZZ[\Z[\[[\[\\\\\\\\___`__``_edchgfihfjhfnkiqnkroksoltplzvq{vq}wr|vpys}u~v}t~uvu~sz}x~xs{vpysnvqkpkfmhcgb^c_Zb^Za]Y_\XSPNROMNLJJHFHGFLKJIHGJIILKJMMLONNUUT\[[WWVZYXZYXZYWZYW[YW^\YZXUb_\fb^gc_id_hb]ic]kd^le^qiayof{qf{pethyk|m~juwywuqqpsqpoxjtfse{ob|pcxlaxmbwlawmbxncwlbukbrh_pg^md\kc[d]Ug`XhaYib[haZg`Zf`Ye_Y>;8>;8:75:7597453153131/1/-31/531/-,/-,/-,1/-1/--,*/-,-,*-,*10.3204204204200.-/-+/-+/-+/-+0-+0.+20-42/641641?;8@=9FB>FB>JEAPKFTNITNIVPKZTN^XQ_XQaZSd\UiaXme\pg^si`tj`uk`xmbyncznc{oc|pd}pd~qdvh}n~ooprtwxz{|~}|yx|}~~~Ŧyѭӽγ׸̰{upkwsonkheb`^\ZXVUVUTPPOKJJLLKEDDFFFLLLMMNRSTQRSQRSTUWSUVRTVTVXSUWSUWPSUNPRTVXTVXUXZVXZXZ\XZ\XZ\XZ\[\^[]^\]^\]^]^^^^^^__aaabbaedcfedgedhfdkifligoliqmirnjuqlvrmxrmysn}wpxq{t|t~u{r|s|qx}ξ|y{t~xqxrlrlfnicje`fb]b^Z`]Y\YVURPSQORPNPOMLKJKJIGGFIHHJJJIIIMMMTTSXXXZZYUUUSSRVUTVUTYXVZXVXVT^[Y_\Yb_[d_[e`\fa\hb\ic]kd]oh`tkc|rhxmrqrqp|ywuqorsqrovgtfse|obznaxl`vk`wlauk`vlauj`si`pf]qg^kbZg_Wd]Uc\ThaYg`YiaZhaZ;96;86;869645315304200/-4204200/-.-+.-+0/-0/-.-+.-+/-,1/-1/-31/31/31032020..,*.,+/-+/-+/-+/-+2/-41/631631952?;8A=9GB>FB>JFAPKFTOITOIWQK]VP_XRb[Sb[Sc[SjaYld[qh^tj`uk`vkayncxlayma|pd}pd~qdrewiyk|m}n~nquuz{{|}zy|{||yޯԾַڸг|~yt{wsmjgca__]\VUTWVUSSRPPOMMMGGHMMNJJKKLMRSUOPRPRTQSUPSURTVQSVPSVRTWSVYVZ]TW[RUXRUXTWZTWZTWZVY\VY\WY\WY\XZ\XZ\YZ\[]^\]^]]^]^_^^_aaabaaedcfeddcbedbfdbgechfclheokhplhtokupkvqkxrl|uo{tmwpyqxozp|rsz}x}u{tmtngqjdid^hc^d_Zb^Y]YV[XUTQOUSQSQPQPOKJIIIHHHHGGGKKKLLMLLLPPPOOPVVVVVVQQQTSSRQQWVUWVUXWUYWU\ZX`]ZZWT]ZVfb]je`ic^mg`le^piarjbtkb~tistwwl|wwuqorpqp}lxitf~qc|obxk_xl`wk_wlavk`tj_uk`qg^nd[lcZi`Xh`Wd]Uc\Te]Vh`Y:85:858644204204204203203200.-.,+0.-0.-0.-10/.,+.-+0.-0/-20/21/31/53131/.,*.,*.,*.,*/-+/-+1/-2/-630630630?;8?;7A=9GC>GB>KFAOJDUOIUOIXQK\UN`YRc[Tc[Td\Ti`Wpf]pg]si_vkauj_xmaymaznb}pd~qdrerewizk{l~nossvxy||{zyz|}}~v{ɰ̯α}}zvrnlidb``^]\[ZXWW[[ZQQQMNNLMMMNOLMNKLNPRSQSURTVQSVPSUORUNQUOSVSVZTX[UY]UY]UY]TX\QTXQTXSVZSVZSVZSVZTWZTWZTWZWY\WZ\XZ\YZ\Y[\Z[][\]\\]\]]___``_cbbdcbecbdb`dcaecafdagebmifnjfokgsnitojvpjzsmyrk}vnwoypxo{p}r}³yyqzskslerkdpjdke`fa\d`[a^Z_\YXUSVTRTRQRQPNMMJJJIIIEFFKLLMMNJJKMNOKLMMNOOPPOPPOPPRRRUTTUUTTSRWVT\[Y[YWUSPYVS\YU]ZVfa]je_ic]mf`og`umevlwm{p~ru{szywwsoppo~n}lxitf~qc|obxk_vj^wk_uj_ti^rh]qf\rg]lbYh_Wi`Xh`We]Uf^V97575332031010.310310310/.,/.,/.,/.,1/.1/.-,+-,+/.-/.-10/20/20/42121//-,-,*.,*.,*.,*/,*1/,1/,630630630852?;7A=9C?;EAKFAPJETNHVPIVPI]VOaZRd\Td\Te\TlcYoe[qg]rh]uj_vk_ymbznb{nb|obreresexi{lzj}mostvwyz{~~wyxy||~uqۭǭֶ~tpmnkh_][a`_]\[WVV\\\TTTLLMQRSPQROPQQSUTVXSUXRUWQTWPSVQTXNRUQUYPTXQVZQUZPUYRV[RV[UZ^SX\PTXPTXPTXPTXQTXSVZSWZTWZTWZUXZUXZVX[WY[WY[XZ[YZ[ZZ[Z[[[[\^^^_^^ba`baacbaba_ecafdbeb`fc`khemienjeqmhsnhwqkxrlxqj|um~vnxo|rtx̴{|sjwogvnfqjcle_jd^gb]e`\b^[`\Y][XXVUQPOONNMMMNNNMMMIJJHIJIJKJLMJKLKMNIJLIJKMNOMNOIJKNNOPQQQQQRQQRRQUTS[YXWVTTRPUSPYUR_[Wb^Yd_Ze`Zke_ohavnf}tk~ukxnyosv{s|wwsoo~l|jm}lvgtfsezm`xk_th\rg[sh\qg\rh]odZkaXlbYh_ViaXh`W75353131/43143121/21/21/.-,0/..-,0/.0/.-,*/-,0/.1/.1/.10.3203204200/-/-+-+*-,*.,*.,*0.,1.,1/,530630630?;7?;7A=9C?;FALGANICUNHWPJYRK^VObZRcZRe]TjaXi_VkbXpf\qf\ti^wk`xl`{ob|ob}pcresetfyjzjzk{kqttwwxy{|{xwxz}}~ouؼ˩ݹytqmjkhehfda_^_^][ZZ[[[WWXVVWPQROPQOQSRTVUWZSVYRUXQTXPTWOSWRVZQUZPUYOTYOTXPUZRW\QV[QV[QV[QV[OTYLPUNRWNRWOSWOSWPSWPSWRUYSVYSVYTWYUWYUWYVXYWXZWYZXYZYZZZZZZZ[]]]^]]a`_a`_ba`cb`db`ec`da_eb_jgclhdokfplgrmgvpjxqkwpi|tlxozq|rzoxw{qzqhypgvnfqibnhake`hc_`\X`]Y][XZYWVTSQPPOOOMNNLLMJKLIJKFGIEFHHJLGIKGIKGIKGHJGHJKLNKMNIKLLMNOOPOPPNNNOONTSSZXWTRQSQOTROURO]ZVa]Yb^Yfa[ga\ib\tmexphyqhvmyozpttkztwso}k{i|jmzjvgtfqczm`vi]th\rg[sh\rg\kaWlbXkaXlbYh_V64242142132120/10/10/.-,0.-.-,0.-0.-.-,.-,0/.0/.0/.0/.2103103100.-.,+.-+-+*-+*-,*0.,0.,1.,520530630<95?;7?;7A=9D?;FAJE@OICQKESMF\UM_WO_WOaYQd[Si`Vj`VlbXmcXrg\sh\vj^ym`|ob}pc~pcqctfufufzj{k|kpruwxxyzyz|wvxyz|~|pf׮ȡ޸㹝~vyttplpmjjhf^][a_^a``ZZZYYYYZ[UVXRSUPRTQSUSVYVY\SVYRUYPTXQUZRV[QVZPUZNSXQV[PU[PUZPUZOUZOTZOTYQV[QV[QV[LQUMQUMQUNQUNRUNRVORVOSVPSVQSVQTVTVXTVXUWXVWXVWYWXYXXYXYYYYY\\[\\\]]\^]\a`^b`_ca_ca_db_ca^iebjfckgcnjepkftniuoiwqj~wo~vnxozp}rv}zpgwnexogng`hb\e`[b^Y_\XZWTWUSWVTRQQPPONNNJJKJKMKLNIKMFHJGILDGIDFICFHGILDGJDGIDGIDGIIKMGIKJKMJLMMNONOOMMMPPOQPPTSRNMLRPNSQNVTQWTQ]YVc_Zd_[f`[nhboibsldwogxog~ul~ulxnzpzwwr}j}k{iyhzizjtdrc|n`zm_vi]th\rg[ncXk`Vi_Uh^Ti_V53232032010/10/10//.-/.-10//.-/.--,+/.-/.-/.-0.-0/-21/21/210310/.,.,+.,+,+)-+)/-+0.,0.,20.520520852>;7?;7?;7B=9D?;FAKE@OICRKETMFYQJ]UM^VM`XOe\Sj`WjaWkaWncYodYrf[ui]xk_}pc~qcqcrdrdsevgyi|knpquvvyzxwxwruxy{{{sfʧҳ̦ۺ~ƕy{|vwrnrolmjhgfd`_^``_a``XYY[[\WXYSUVTVWPRUSUXSVYTW[TX\SW[TX]QUZPUZQV[RW\QW\OTZOTYNTYNSYOUZOUZOUZPUZPUZPUZOSXLPTLPTLPTMQTMQTNQUNQUORUORUPRUQSUQSUSUWTVWUVXUWXVWXWWXWXXXXXYYY[[[ZZY_^]`^]`_]a_^b`^ca^da_ifcheaifbmidnjermgtnhxqk|unwovn}tkyow}ǧe^W^XS[VQ^ZU^ZV[XUXVTVTRSRQNNNNOOLMNJLMFHJEGIFHJDGICFIBEHDGJCFIADGBEIBEH@CFBEHBEHEGJGILILNJLNIJLNOPKKLKLLNOOMMMQPOMLKNMKPNLSPNVSP\XUb^Yc^Zkf`oicrleslewohwphxph{sj~ul{sjxoxywpl}k{irawgxgqb{m_zl^xj]vi\rfZnbWj_Tk`Ui_U2102102100/.0/..--.--0/..--..-..-/.-0//00//.-10/10/10/20/21/0/./.,-,+.,+,+)/-+/-+/-+20-20-52/741<85>;7?;7?;7B=9D?;GAKF@NHARLEWPHYRJ\TL\TLaXPd[Qi_UkaWlbWodYpdYpeYsg[vj]zm`qc~pbpbsdtewgzi}lnpssvwuvuxuqrquvz|vjg޻ᾜv|xxwuָuy~xquplsolrolljha_^^]]cbb^____`UVWUVXSUWPRTRUWTX[SVZUY]VZ^UY^TX]QV[RW\QV\PV[PU[OUZOTZOTZNTZNTYNTYNTYOTYOTYOTYNRWNSWKOSKOSLPTLPTMPTMQTNQTNQTOQTORTPRTQSTSUVTUWTUWUVWUVWVWWWWWWWWXXXXXX[ZZ^]\^]\_^\`^]a_]b`]c`^hebifbjfckgcmhdqkftoivpjyqj}umwo|sjxmv{ʹVRNVROSPNPNLVTSPPOSSRNOONOPJKMHJLFIKCEH?BE>AD=AD<@CBE>AE@CG>AE>AE@DGCFIILOHJMGIKIKMLNOIJKLMNMNNNNNPONONMMLKOMKRPMURP[WT^ZWfb]qkfrlfsmftmgtngwpizskxqi{skxpiwphw{ppl{hq`o_p`sc~o`{m_yl^xj]sgZmaVk`Uj_T2102100/.0/.0/.0/.0/..-,.-,.-,.-,0/.0/..--0/.0//10/10/10/0/-.-,-,*-,*.,+.,+.-+/-+1/-2/-42/52/;85<85>:7?;7?;7B=9D?;E@;ID>LF@LF@QJCUNGZRJ]TL]UL]ULd[Rg]Tj`Uk`VkaVncXocXpdXpdXtgZ{n`pbqcrcuewgzj}lnqsrsuvuurqrsrquxrmya٭Ыxvxtpywu~|x{ttnitplpmjmjhjhfa`_a``aaa]]][\\]^`Y[]UXZVX[TWZX\_W[^UZ^TY]SX\PUZSX]PV[PUZQV\PV\PV[PU[NSYNSYNSXNSXNSXNSXNSXNSXOSXMRVLPTKOSKOSLOSLPSMPSMPSNPSNQSOQSOQSPRTPRTSTVSTVTUVTUVUUVUVVVVVUUUWWWZYYZZY]\[^\[^]\_]\`^\a_\b_]gdaheaieblhdlgcrlgsnhuoiwpi|tlvn{riyouzɰGFEHGFEEEFFFDEEDEGEFHEGICFIBEH@DG?CF@DH;?C:>B9>B;?C8=A:>C;@D:>B;@D:>B:>B<@D=@D=@DEHLIMPJMPILNHJLIKMJLMMNOPQQPPPONNPONOMLRPOZWU[XU`]Zd`\gc^mhcnhcoicoicpjdunhuohslfunhungwpiu|pmzgudq`o_p`zk]{l^{m_wi\uh[qdXocW10//../../../../../..--,--,--,//.//..-,0/.0/.0/.0/.10//.-/.-.-,-+*/-,-,*.,+.,+0/-1/-1/-41/:74;85=:6>:7?;7?;7B=9D@;E@;JD>LF@LF@QJCVNGXQI[SK^UL^ULcZPh^Ti^TkaVlaVodXpdXqdXqeYreYuh[xk]~parcufxh}l|kmoprstturqonknrsoiw`sứ|~uwrntqnyurËt}u}wqsniqmjpmjkifdbaa`_]]\YYY_``]^_[]^WY[Z\_X[^X[_WZ^UY]TX]SW\TX]SX]RW\OUZPV[PU[OU[OUZOTZOTZMRXMRXMRXMRWMRWNRWNSWNSWMQVMQVLPTKNRKORLORLORMPRMPRNPSNPSOQSOQSPQSRSURTUSTUSTUTUUTUUUUVTTTVVVXXXYXXZYX\[Z]\Z]\[^][_][`^[a^\fc`gd`jfckgcojfpkfrmgtnhvoi{slyqi|sj}rty̲89:;<>>@B>AC?BD=ADB:=A:?C;@D:?D86;@9>D9>D9>C7C;?C=AECGKGKOLOSILOLOQKMPLNPKMNOOPNNOOOONNMSRRTSRXVT[YV^\Yb^[ea^hd`gb^qlgrlgrlgsmguoiqkeqkesmfunhvpjzqymmzgucq`o_o`wiZxj[vhZtgZreY/.-/.-/.-/.-..-..--,,-,,/.-/../..--,/..//.//.0/.0/...-/.-/.-.-+.-,.-,-,*/.,0.,0.,1/-31/:74:74=96=:6>:6?;7?;7B=9B>9E@;HBMF@RKDVOGWOG\SK^UMaXNbXOdZPg]Rj_TmbVnbVqeXqeYrfYsfYtgYyk]zl^~o`rbwf|j|kmppqorqpnonkjmqnycv`|c§{sljgcwso~yt־}ov|votojuqlokhjgdigedcbdcb`__^^^^^_\]^^_a\^`X[]WZ]WZ^UY]VZ^UY]UZ_TY^SY^QV[OTYNSYNSXOTZOTZNTYNTYMRWMRWMRWMRWMRWMRWNRWNRWMQUMQUNQULPSKNRKORLORLORMORMPRNPRNPROPROQRQSTRSTRSTSTTSTUSTURSSSSSUUUUUUXWWXXW[ZY[ZY\[Z][Z]\Z^\Z_][`][eb_fc_iebmhdpkgpjeqlfsmgxqjzrkvnf~uk~sv{ٺ69;:>A7;>8<@:?D9>C8=B6;@5:?49?39>4:@7=C7=C7=C8>D8>D7D9>D9>D9?D8=B7;@8<@=BF@DHFJNKNRJMPKNQJMOLMOKLNNOPOPPNNNSSSSRQXWUYWVZXVb_\eb_fb_iealhcokfpkfqkfqkfoidqkfsnhsmhrmgrlgsnirok|iwep`q`yj[tfXugYsfX0//0//..-.--.--..-,,+..-..-..-0//..-/.-/../..//..-,.-,/.-/.--,+.-+.-,/-,/-,0.,0.,20.964974:74=96=:6>:6?;7?;7B=9B>9GBOIBPIBRKDWOGZQI_VMbXObXOcYOf[Qh^Si^SncWqeYrfYsfYtgYtgZxj\{m^|m^}n_wgzi}kppqml~k}ilopl~ijlizelXu^׬῝ic]ifckgdxsnҹq}szr{uotojplhmjgjhegfdgfeedc^^^^__^_`\^_\^`[]_Y\^WZ]X[_VZ^W[_VZ^UY^TX]SX]RW\PUZNSXMSXMRWNTYNSYNSYNSXMRVMRVMRVMRVMRVNRVNRVMQTMQUMQULPSMPSKNQLNQLOQMOQMOQMOQNPRNPROPRQRTQRTRSTRSTRSTQRRQRRRRRTTTUTTUUUWWVXWWZYY[ZY[ZY\[Y][Y^\Z^\Za_\da^eb_jgclhcokfqlgqkeslfwpiunfume}tk|qu{7539@39@7>E7>E7>E7>E6=C7=C5;A7=C:?E9>C8:6>:6?;7B=8E?:GBF7?F6=D7=D6B;@E@DIBGKGKOJMQILOLORORTRTVTUWSTUVWWXXXWVVXWV]\[^][da_eb_hebgc`gd`hd`fb^kgblgbnidnidnidkgbmidmhclhckgcpoj|htb{k[xiY--,--,--,--,/..---------//.---.--.--..-..-..--,,--,//.0/.0/.-,+/.-0.-.-+/-,1/-753853863963<85<96=96>:6>:6A=8D?:E?:GBPIBQIBSLDXPH[RI`WMcYOdYOdZOeZPh]Ri]Rl`TobVocVseXsfXtfYxi[xj[~o_tcyh|j}jygzhyfzg{gyf}i|h}h|g~huas_ucR~jWq[ن{qYXW[YXb_\vqm{ųз`WMwn{sluoiojeplgkgdjgdjgegedecb``_`````a^_`\^_Z\^Y[]WZ\VY\VZ]UY\VZ]UY]TX\SW\RW[RV[QV[NRWMRWMRVMRVMQVNSXOSWMQVMQVMQUNRUNRUNRUNRUMPTMQTNQTNQTMORMPRLNPLNPLNPMOPMOQMOQNOQNOQNPQOPQOPQOPQPPQPPQPQQQQQSSSSSSSSSVUUVVUWVUYXWYXWZYXZYX[YX\ZX\ZX][Yb_]eb_heaiebmhdniepjeojdoicrkdtmeyqhzp{pw|3;C19@-5<08@.6=.5=-5=/6>08@3;D19B19A29A3;C4;C4;C4:6@<8A<8D?:D?:GBLE>QIBTLDVNFYQH\SJaWMbXNeZPe[Pf[Pg[Pj^RmaTpcVqdWrdWreWxj[{l]|m^p`yhveygzgxfygxeyfzfyewdtazfr_|jXvdS{hUp[ΥˮYYYXXWa`^tqn{vƴƮݘ|d]Uxphunhsmhpkfqmhpmimjgkhfdbaa`_aa`aaa_``]^_[]^Z[]XZ\WY\WZ]VY\UX\TW[TX\TX\SW[RW[RV[QVZNRVNRVMRVMRVMQVOSWOSWMQUNQUNQUNRUNRUNRUORUMPSNPSNQSNQSMORMORMPRLNPLNPMNPMOPMOPNOPLMOLNONOPOPPOPPOPPPPQPPQPPQRRRSSSSSSSSSUUUVUUXWWXXWYXWYXWZYW[YW[YX\ZX_\Zca^da^gd`kgclhcnidojeoidmgaqjctleyqhzo}ry08A/8A/8@-6>.7@.7@.7?08A08A3;D3E8>ELE>OH@RJBTLDZQHZQHbXNbXNcXNcYNdYNeZNh\Pi]QpcVqdWreWseWvhYwhZugX{l\qatcucvdvdwexeyfvctbvbp]{jXudSn^Nl\K|iVm\\]\]_eeemljvro|ij¬жîRKEoh`rkdpjdojdie`plgolhqnkjgeca`cb`bba```^__\]^[\]Y[]XZ\XZ]WY\UY\VY]TWZSVZTW[SW[RVZRVZRVZPTXNRVNRVNRUNQUOSWOSWNQUNQUNRUNRUORUORUMPSNPSNQSNQSNQSOQSMOQNPQNPQMNPMNPMNPLMNLMNLMNLMNNOPOOPOOPOPPOPPPPPPPPRRRRRRRRRSRRUTTUUTUUTXWVXWVXWVYXVZXWZXW[YW\YW`^[c`]fc_gc`jfblgcmhcojdke_ohbqjctld~ukxmty0:D/8A09C09C1;D3;7?;7@<7A<8C?:D?:E?:HBLE>MF>OH@RJBWOFZQH_VLbXNcYNdYNdYNeZOfZOf[Og[Om`Sn`SnaTobTrdVqcUteWzk[~n]q`o^tcucvds`tar`~l[wfUn^NjZKgXHo^MvcQ߳~[^a_abqqqvts|xu~uMID]WPib\mg`qkenhcmhdjfbpmikhedb``^]ba``__^^^^^^Z[\XZ[WXZWY[VXZUWZTVYVY\UX\SVYTW[SWZSVZRVZRVYPTWNRUNRUNRUNRUPSWNRUNRUNRUORUORUORUORUNPSNPSNQSOQSOQSOQSNOQNPQNPQMNOKLNKMNLMNLMNLMNNOPNOPNOPOOPOOPOOPOOPOPPQQQRRQRRQRRRTTSTTSUTSWVUWVUXWVXWVYWVYXVZXVZXV_]Zb_\c`]eb_ieajfbkgbmhcke`je_ohbslevnfwmxmtw7@H09C09C1;D2G4>G4>G5>G8AK7@I8@I7?G9AI:BI:7>;7?;7@<7@<8C>9D?:E?:HBMG@NG@OG@OH@RJBSKBVMD[RH`VLcYNdYNeZNeZOfZOg[Og[Ok^Qk^RoaTj]Pn`SqcUrcUxiY{l\o^o^n]o]~m\n]o]ziXpaQdVG`RDeVGhYIraO{adh_bdklmrrrtsq|xu~zzz~ztvqlfc`FDAHC?e^WgaZic]ke_mgbnjenjeolhhecda_a`^_^]_^^]]]]]^ZZ[XYZXZ[WY[XZ\VY[UXZUWZTWYSVYRVYRUXQUXQTXQTWORUMPSORUORUORUPSVORUORUORTORTORTPRTPRTNPSNQSOQSOQSOQSOQSNOQLNOMNOMNOKLMLLMLMMLMMLMMNOONOONOONOOOOOOOOOOOOOOQQQQQQQQQRQQTSSTSSTTSVVUWVUWVUWVUXWUXWUYWVYXV\ZX_\Za_\da^eb^hdajeaie`id_je`jd_ohaqjbvnfwmxmtz6@J6@J4>H39FA;GAMF@NG@OG@OH@PH@SKBTKCVME^TJ_UJbWLbWMcXMdXMbWK`UJaUJdXLg[Nh[Oi\Ol_QpaSvfWyiZzjZ~m]ziY{jZm\}l[n_Pj[LcUGaSF`RDcTFq`O~kW߳}_cgacfjklllkusrxusxuq}yzvrzvzvsrolromgfdYXWGFFFEC>;8QLGe_Xe_Yhb\gb\id_kfbokglieec`ca^a_]a_^]\[]\\[[[YZZXYZXY[YZ\XY[WY[VXZUWZVX[UXZSVXRUXRUXRUWQTWPRUNQSNPSNPSNPSNPSORTORTPRTPRTPRTPRTOQROQROQROQROQRNOQNOQNOQMNOMNOMNOMNOLLMLLMLMMLMMNNONNONNONOONOOOOOOOOQQQQQQQQQQQQQQQQQQSSRTSRVUTVUTVUTWVTWVUWVUXVUXWU[YW][Y`^[a^[da^gc`hd`gc_gb^id^je_jd^qjctld~tkyo{ov|7AJ9BL7AJ9BL8AJ7?H:CK9AJ;CK=EM?GOAIQBJQAHP@GNAHNBHNEKPFKQGLQFKOGKOJNRMPTNQTORTRTWUWYXZ[YZ[XYZXXXYYX\[[[ZY^][][Y\ZX^\Z_]Zb_\c`]c`]d`]b_[`]Y_[W_[Wc_[c_[fa]ea]gc_ea\gb^,,-,--,-----++++++++++++,,+---,,,,,,-,,.../..//.100321210432320321421431531753863964;85:74;74;84<85=95=95B=9E@;F@;FA;GAMF@NG@NG@OH@PH@PH@SKBTKCWNE_TJ_UK`UKaVKaVK`UJ`UJaVJdXLeYLfYMk^QoaSpaSvgXyiZufVvfWziY{jZvfVj\McUHaTF`RE\OB^PCdUGcTEjbfjdgkfhjooppoorqptrpusp|zwxvtqonihhbbbZ[\KLMFGHEEEDDC986B?:6?;7@;7B>9E@;F@;GA;IC=JD=MF?NG@OG@OH@PH@QI@QIARIASJASJAVMCYOEZPF[PF[QF^SH_THbVJeYMfZMgZMj]On`Rn`RoaSk]OaUH`SF^QE\PC]PD\OBZNA\OBaSEn^NmZ޺filkmolmniijllllllpppkllhjkace`beSVYMPSFILBEHACE@BD:;<666;:9<:7KGDWRMZTNc\Vc]Wc]Xgb]id_id`d`]b_\da^a_]_^\`^]ZYXYXXYYY]]^Z[[YZ[XYZWXYWXYVWYUWXVXZVXYWY[WY[SUWRSUQSUQSUPQSPQSPQRPQRPQRPQRPQRPQRNOPMNOMNOMNNMNNMNNOOPOOPOPPOPPMNNNNNNNNNNNNNNLLLLLLLLLNNNNNNNNNNNNNNNNNNNNNPPOPPOPPOPPOPPOPPORRQRRQSRQTTSUTSUTSUTSUTSVTSVUSWUSYWUZXV\ZX]ZX_]Z`]Zc`\da]d`\ea\gb]ic^ic]oharjb|siwmwk~pzPV]SZaPW]QW]QW]SY_RW]RW]SX]SX]TY]W[_Y]aZ^a[^a\_a^ad^`b`bdacdbcdabcdeeggghhgnmlmkjpnlqomromspmrnkpmiqmjrnjqlhqmhpkfpkfsniojepjerlgpjenhclf`nhbuoismfrlf,,,**+**+**+*++,,,,,,,,,+++------//.100110210432432433321421432532754642642753753853963963:74<96=:6>:6?:6?;7B=9C>9F@;FA;GA;JD=MF?NF?NG@OG@PH@PH@QI@OG?RJASJATJATKBSI@VLBVLBYODZOE_TI`TIfYMl^Ql_Qh[NfYLgZM^QEYMBZNB^QEYMA]PDXL@WK?[OBj[LhYJnZɡ}{yhjlklmooommmooonnnklmfgibeg]`cRUYMRVJNSFJNBFJAEH=@B578567887976@>;KGCVQLVQK]WQ`ZT`ZUid^id_hd_d`\a^[ca^a_]_]\_^]\[ZZZY]\\\[[\\][\\XYZXXYWXYVWXVWXWXYWXYXY[Y[\XYZTUWTUVTUVRSTPQRPQRPQRPQROPPOOPOOPOPPMNNMNNMNNMNNMNNNNNNNNNNNOPPNNNNNNNNNNNNNNNNNNLLLLLLNNNNNNNNNNNNNNNNNMNNMPOOPOOPOOPOOPPOPPORQQRQQRQQTSRTSRTSRUTRUTSUTSVTSVTSVUSWUSYWU\YW]ZW_\Yb_\c`\c_[d`[ea\gb]ic]mg`phauld|sivkxlu^bg^cg\ae]ae[_c^ae^ae]`c]`c^`c_ac_ac`bcabccdfdefeefjjjiihjihmlkljiqompmkqnlrolrolvrnvrn|wszvqytoxrmvqkwqkuoivpjvpjwpjyslunhunhxqjuohuohzslungwpi*********,,,,,,,,,,,,+++,,,.../..000100322322332432432321321431865532642642864753853863963<85<95=96>:6>:6?;6B=8B>9E@;F@;GA;JC=JD=MF?NG?OG@OH@PH@NF>OG?RIASJASJARI@RI@SI@VLBVLBYOD]RF]RG`UI_SGbVJcVJaUH\QE[OCYMBZNBXL@YMAZNAXL@bTGaSFo_PgXJp\ÜopqsssqqqsssoopjkliklcfhaeiTX\OTYGLQHMSDINAEJ@DH:=@9;=567566877;98B@=LHDSNIVPK\VQ_YT_ZTfa[hc^hc_c_\a^Zc`]a_\_][_^\`^]\[[[ZZ\[[]\\\\\[[[XYYWXYWXXVWXXYYWXYZ[\Z[\XYZVWXUUVTUVSSTSSTSSTQRROPPOPPOPPOPPNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNLLLNNNNNNNNNNNNLLLLLLNNMNNMNNMNNMNNMNNMNNMPOOPOOPOOPOOPOOPOORQPRQPPOORQPRQPRQPRQPSRPSRPSRQTRQVTSVTSYWUYWU\YW^\Ya^[b_[a^Zc_[d`[ea\gb\ga[mg`piawof}sjujylthjlfhjfhjefhefhghjfghfghggheefffffffgffhgflkjmljpnlsqotqourospmvsouqmxtpytpwsnxsnytn~xsysys~xq|voxrk~vo~wozrk}umwoxoxo{sk~um~umyqi{sk{sk******+,,+,,,,,,,,-........00/000000221222322332432432321321764865532542864642752853863;85;85<95=95=:6>:6?:6A=8B=8C>9F@;FA;GA;JC=MF?MF?NG?OG@PH@NF>OG?RIARIAQH?QH?RI@RI@ULBVLBTJ@WMCZPE[PE\PE_SG`TH[PDYNCWLA[OCVK@\PD]QE\OCZNBaTGcUHn_OfXI~kYάtts~}}vvurrrqrroqsilnbfjX]bOUZIOTIOUEKQAGL@EJ<@E8:6>:6A=8B=8C=8E@:F@;GA;IC=JD=MF?NF?NG?ME>KDOG?OG?NE=NF=OF>MESJ@QH>TJAWMCZPEYNC\QEZODVK@VKAWLAUJ?VK@YNBZNB[OCYMB`SFbTGm^OgYK~lZͬyxwxwwstunpsgknW[_TZ_NTZHNTFMTELS?EK:@E9>C7<@7;>68;468556555:98<;9A?:6>:6A<8B=8B=8C>8F@:F@;GA;HA;JD=KD=LE=LE>KCNF>JC;KC;LC;JB:JB:MEQH>QH>TJAWMCVKAVLATJ@UJ@VK@TI?RG=RH=XMBTI>WLAXMA\PDaTGk]Nn_PwfUsݰ}}|stuoru^bfY^cU[aLSZJQYELTCJQ>EK:@F7=B9=B59=58<58:467456777;;::86A>:6A<7A<8B=8C=8E@:F@:D?9E?9HA;KD=KD=LE=ME>KDKC:QG>VLBTJ@UKASI?TJ?TJ@UJ@SI>QG=RG=UJ?SI>TI>XLA[OC]PDgYKi[Mn\wb޷wxzhko_chZ`fRY`MU\KS[FNVDLT=EL9@F5;A5:?38<59<58;79<579234888;:9;:8<97EB>LHCRLGYSMYSM`ZUc]Wic^d_[b^Yd`\b_[c_\eb_ec`db_ca_^\Z]\Z\[Y\ZY]\Z^]\]\[_^]]\[`_^__^\[Z]\\]\[]\[YXXWVVUUTSSRSSRSSRSRRQQPQQPPONNMLNMLNMLNMLNMLNMLMMLMMLMMLMLLMLKKKJKKJKJJKJIKJIKJIMLKMLKKJIKJILKKLKJLKJLKJNMLNMLNMLNMLMMLMMLMLLONMONMONMQPOQPOQPOQPOQPOQPORPORQOTRQTSQUSQWUSXUSZXU]ZW]ZW^[X_\Xa]Yb^Yc_Ze`Zga[kd^mf_rjbzpg~siuiyls}{}}}}}}}{vqqzlzlzlpp|nykvhuh,,-....../00.....////000100110111111211221221321321432653654875764864864753853863964974:74;74;84<85<95=95>95@<7A<7B=8B=8E?:C>8D>9E?9GA;HA;KD=KD=HA:HA:IB:G@9H@9HA9IA9IA9LD;JB:KB:SI@SJ@TJ@RH?PF=SI?TI?RH>PFWK@ZOC[OC`SFgZLqaR~lZ~~~tvxhko`ejRX^RY`NV^IQZFNWBJS?GO9@G8?E68D>8D>9E?9E?9F@9IB;G@:HA:F?8G@8G@9H@9HA9G?7IA9JB:OF=TKAPG>NEOF=RI?SI?QG=OE\PDZOC^RFfXKhZLm\|gṑnpsimr\agU\cPW_NW_IQZCKT@IQ?GO9@H58D>8D>8C=7F?9F@9G@9G@9F?8F?8G?8E>7C<5F>7IA9IA9QH?OF=OF=NEOERH?PG=NE;OEQG=RH=XMBVK@ZNCdWJcVIwgWxgVx{{{oqsfjn\bgW^ePX_JRZFOW?GO>FO:CK9AI549?49=48<47;69<469345344555332421641=:6GB=OIDXRK[UN`YSd^Xf`Zd^Yfa\fa]id`gc_lhdheaeb_`]Z_]Z`^[^[Y[YW[YVZXV\ZX][Y_][\[YZXW\ZX]\Z[ZX[YX[YW][YYWUYWUYWUWUSUSRSQPSQPSQOQONQONQOMOMLOMLNMKNMKNMKNMKNMKNLKNLKLJILJILJIKJIKJIMLJMKJMKJMKJKIHKIHJIHJIHLKJLKIMLKMLKMLKMLKMLKOMLOMLOMLPONPONPONPONPONQONQONQONQPNSQPSRPTRPVTRVTRYVTYWT\YV]ZV^ZW_[W`\Xa]Xc^Ye_Zhb\jd]me^tkcypf~shthykpŪ§{ytlXMC  5/)G?7VKA`TH`THcWKfZMi\Oh\O//////...//0/00000000000000100110110211211432432654543765865653754764:85864974974:75;85;85:74:74;84<84<84?;6?;7@;7@<7A<7B=7D?9E?:C>8B<6B=7C=7D=7F@9E>7E>8C=6D=6D=6E>7E>7H@9IA9KC;NF=OF=OF=PG>NEQH>OF=MD;NE;QG=OEPFGP:6@<8A<8A=8B=8E?:C>9D>9B=8A;6A<6B<6C=7C=7D=7B<5@:4A:4C=6B;4D=6C<5C<5MDPG>OEOFXMBWLAUJ?XMB\QEaTHl]OyhXwbԮuuvnpsgkp]ciSZaMT\GOWCKT>GO=FO;DM8AI7?G29@18?17=28=17<16;159158369,.0,./,-.00000/31031/852?;6JE?XQK]VPb[Tf_Yc]Xf`Zje_lgbkfakgbniemidhd`a^Z^[X`\Ya^Z^[X^[X][X]ZW`^[^[Y^[X\YVZWUYWT[XV[XV\ZWZXUZXUXVSXVSXUSTRPRPNRPNRPNRPMQOMONLOMKMLJMKJMKIMKIMKIMKILKILKILJILJILJILJHKJHMKJMKJMKJMKJLKJKIHJIHJIHLJILJILJIKJIKJIMLJMLJNMLNMLNMLNMLPOMPOMPOMPOMPOMPOMPOMQOMRQOSQOSQOSQOVSQVTQWTRYVS[XU\YV]ZV^ZW_[Wa\Xb]Xd^Yf`Zjc\le]qh`vmc|rgsgwj}m    001////////////00/000000000100100110322332544654654754764643653753975975864964974:74:75;85;85<85<95=96>:6@<8A<8A=8B=8D?:C>9C>9B=7@;6A<6B<6B<6@;5A;5A;5>82>82<60=71B;4B;4E>6JB:JB:MEWLBUK@TI?TJ?SH>\PD`TGk]O|kZ~h׳nqthlq[`fQX^LS[IQZBKSAJR;DL;CL6>G8@H5=E19@/6<06=05;05:/48/37148*-/*,.*+-,--,,,.--21030.52/<84JD?VOH]VOa[T_YRc]Wc^Xjd_je_id_lgbnjekgbhd`b^Z_[Xb^[`\Y]ZV_[X^[X^[Wa^[_\Y_[X\YVZWUZWT\YV]ZW]ZW[XU[XUYVSXVSVTQVTQTRORPNRPMRPMROMROMPMKOMKMKIMKIMKIMKIMKIMKILKIKIGJIGJHGJHGJHGKJHKJHKIHKIHKIHLKIJIGJIGLJILJILJIKJIKJIKJHMKJMKJNMKNMKNMKPNMPNMPNMPNMPNMPNMPOMPOMPOMRQOSQOSQOUSQVSQTRPWTQYVS\XU]YV^ZV_[V`[Wa\Xc]Xd_Yhb[jc\og_ri`xnd~sgvjzkw    ///////////////0//00/000000100100322322332544654875654543643653865975975864864964:74:74;85;85<85<95=95?;7@<7@<8A<8C>:D?:B=8A<7@:5@;6A;6?:4@:4@:5@;5A;5=72>82<60>82?92?92HA:IA:IB:LDNF=OF=MD;KB:KC:NETI?XMAYNB]QEk]Om\~hƧein\bgT[bMU\GOWENV@IRF3;B29A06=.4:-39.49/49.26-15/26*,/*,.*+,**+,,,,++21/20.52/<84JD?UOHZTMc\U_XRc]Whb[lf`ke_id^lgakfalgbgb^b^Z_[W_[W`\Xb^Zc_[c_[b^[`\Ya^Z]ZW]ZV[XT[WT\YV^[W]ZW[XU[XUYVSYVSWTQWTQVTQTRORPMRPMRPMROMROMPMKPMKNKIMKIMKIMKIMKIMKIKIGJIGJHGJHGJHFKJHKJHKIHKIHKIGKIGIGFIGFJIGJHGJHGJHGKJHKJHKJHMKJNMKNMKNMKPNMPNMPNMPNMPNMPNMPNMPNMPNMRPNRPOSQOUSPUSQTROVTQYVSYVS\YU]YU^ZV_[V`\Wb]Xc^Xe_Ykd]kd\ph_ulbyodthvhzk   ///////////////0//00/000000100221321322543543654865654542643864865975975863864964974:74:74;85;85<85<95?;7?;7@<7B>9C>9C?:B=8A<7?:5>94>94?94?:4@:4@:4@;5?93;50;60>82>82A:4F?7D=6F?8KCPG>NE=LD;JB9MD;MEOF=RI?SI?TI?RH>SH>SI?WLAXMB_SGgZLzjYwbǥZ_eRY_MT\IQXFOWCLU;CK8AI8@I6?G2:B.5=/6=.5;,28-28.39/49,04-15-03(*-*+-)+,(())))))(+*)20.41/962JD>UOHZSLaZSaZTc]Vjc]lf`ke_kf`je_kf`lgbgb^c^Y`\Wa]Ya]X`\Xb^Zc_[_[W_[W^[W^ZW\XU]ZV[XT]YV^[W^[W\YU\XU[XUXTQWTQWTQWTQUROSPMSPMRPMRPMROMROMPMKPMKNKIMKIMKIMKIMKHMJHKIGJHFJHFLJHLJHKJHKIHKIGKIGKIGJIGIGEJIGJHGJHGJHGJHGIHFIHFIHFKIHNLKNLKNLKPNLPNLPNLPNLPNLPNLPNLPNLRPNRPNRPNSQNUSPTQOTROVTQYVS[XU\YU]YU^ZV_[Va\Wb]Xd^Xf_Yjc\nf^qh`wmc}rfuivh}k   ///////////////0//00/00/000221221321322543543765865532542643864864975975975864964974:74:74;84;85<85<95?;7?;7A=9B=9C>9C>9@;6>95=83=83>94>94?94?:4@:4@:4>93=71;50=72>82E>7E>7C<6D=6HA9IA:IB:JB:LDRH>PF=PG=QG=RH>SH>TI?ZOC^RFfYKpaQm[tV\bMT[GOVEMUDMVF3;C08?-4:*17+17,27,28,16-27+04+/2(+.(*,)+-&'('()))))((+*)-+)41.73/JD>UNHZSLaZSc\Ue_Xjc]jd]ic]jd^ic]jd^ic^hc]a\Xc^Yb]Yc_Zc^Zb^Yd_[b]Y_[W]YU]YU^ZV\XT\XT[XT]YV]YU[WTZWSXURXUQXTQXTQWTQWTQUROSPMSPMRPMROMROMROLPMKPMJNKIMKIMKHMKHMJHKIFKHFJHFLJHLJHKIGKIGKIGKIGKIGKIGJIGJHGJHGJHGJHFJHFIHFIHFIHFIHFKIHLKILKINLKONLONLONLONLPNLPNLPNLRPNRPNRPNRPNURPSQNTQOVSPXURYVS\XU]YU^YU_ZV`[Va\Wc]Xe^Xib[iaZog_ri`xncsgugyi///////////////0//00/00/211221221321543543543765865532542764864864864975975:75:85964974:74:74;84;85<85@<8A<8A=9B=9B>9C>9?;6>94<83=83=83>93>94?94?94?:4>83<71:5/=71?93D=7B<5C<5E>7H@9HA9IA9IA:LD;LDOFUK@YNC]QEbUHrcTtdTmX[]JRYEMU@HP?GP:BJ6>F5>F3:B19A/6=+28)/5+17+17+06*/4*.3+/3+.2&),&(*')+$%&%&&'''(((*)(*)'1/,73/HB94<73<83=83=83>93>94?94?94;61<61<61<71?93C=6B;5B;5E>7E>7E>7H@9JC;KC;KC;LC;LDQG>QH>RH>SI?SI?WLAXMB[PE]QEm_QqbS|jXqIQX@HP?GO8@H5-4;(.5(/5(-3*/5*05*/4*.3)-1&)-$')%(*$%'#$&%%&$%%$$$('&*('/,*73/MF@UNG\UM\UNc[Tg`Yle^hb[ic\jd]ic]kd^hb\ic]d_Yd^Ye`Ze_Zd_Zb]Xa\W_ZU]XS\XS\XS\WS]YT_ZV[VRZVRZVRZVRXTPWTPURNUQNWSOVSOVROVROURORNKQNKQNKQNKPNKPMJNKIPMJNKHMKHMKHMJHKIFKHFKHFLJGLJGLIGKIGKIGKIGKIGKIGJHFLJHJHFJHFJHFJHFIHFIHFIGFIGFIGFJIGJIGLJINLJNLJNLJNLJNLJNLJNLJNLJROMRPMRPNRPNSPNSQNTQNVSPXURYVR[XT\XT]YU^ZU`[Va\Vc]Wd^Xf_Yf_Xme\pg^si_|pdreyj///////////////0//111211211211221433443543765543653532754754764864864964974:75:75:85;85;85<95<96=96?;8@<8@<8@<8?;7@;7@<7?:6?:6<73<73<83=83=83>83>93>94=72;60;61@:4B<6C<6A;4A;5D=6D=7B<5G@9JB;JB;JC;KC;KC;NE=LD;JB:H@8H@8IA8IA9LC;LC;LD;OF=RI?SI?QG>QH>RH>RH?SI?VLAWMB[PD]QEoaRxgWu`j=DL;CK6>F4UNG\TM^WO`YRg`YhaZib[le^kd]jc]ic\lf_jc]e_Yd_Yhb\gb\c]Xb]X`[V`[U_ZU]XS]XS^YU^YT^YT_ZV[WRWSNXTPXTPXTPXTOUQNUQMUQMTQMTQMTPMTPMPLIQNKQNKQNJPMJPMJNKHNKHLIFLIFKIFKIFKHFKHFLJGLJGLIGLIGKIGKIGKIGKIFJHFLJHLJHJHFJHFJHFIHFIGFIGFIGEIGEJIGJIGJIGLJHNLJNLJNLJNLJNLJNLJNLJPNLPNLPNLRPMRPNSPNSQNTQNXURXURYVR\XT]YT^YU_ZU`[Vb\Wc]We_Xib[g`Xne]qh^wla{ocqdvf!!"!!" !! !!///////////////110111211211211432433443543764543643532753754764864864964974974:75:85;85;85<85<95>;7?;7?;8@<8>:6?;6?;7@;7>:5?:5;72<72<73<83=83=83>83>93<72;50;60?:4D>7B<6@:4C<6C=6D=6B;5B;5IB:IB:JB:JB;H@9KC;ME=KC;IA9G@8JB:JB:H@8KC:KC:LC;QH>QH?RH?PF=NEUNF\TM^VOe]Vg`Xf_WibZjc\ib[ha[le^mf_jd]f_Yga[kd^f`Zd^Xa\Va[V`[U`[U`ZU_ZU_ZU^YT^YT^YTXSOWSNYTPYTPXTPXTOVRNUQMUQMUQMUQMTQMTPMTPLPLIOLIOLIOLIOKHPMJNKHNKHLIFLIFJGDIGDKHFKHFLJGLIGLIGKIGKIGKIFKIFLJHLJHLJHJHFJHFJHFIHFIGEIGEIGEIGEIGEJIGJHGJHGNLJNLJMLJNLJNLJNLJNLJPMKPNKPNKPNLQNLQNLSPNSQNVSPXUQYURZVR\XT]YT^ZU`[Ua[Vb]Wd^Wf_Xf_WkbZmd[pg]wk`wk^}oaq###$%%$%%&&'#$%!"# !!"#!"#!""!""!""  //./////////110110110211211432432432443543543543643532653753754864864864974974:74:75;85;85;85<95>;7?;7?;7>:6>:6>:6?;6?;7>95>:5=84;72<72<72<83=83=83>83<71:50=72C=7C=7D=7B<6B<6C<6C<6A;4A;4D=6HA:IA:G@8G@9JB:LDRH>WMBVKAWLB[ODj\OufWtdTkn[18?.6=.6=*18(.5$*0#)/$)/%+0'-2',1%).#'+!$ #" "! ! !""!!!#""#! *'%61-KD=RKDYRJbZRg_Wg`Xf_Wkd\jc[g`YibZle^ib[kd]hb[le^ic\g`Zb\Vb\Va\Va[Uc]W`[U`ZU_ZU_ZT_YT\WRZUPVQMXSNYTPYTPXTOVRNVRMVQMUQMUQMUQMTPMTPLRNKPLIOLIOLHOLHOKHNKHLIFLIFJGDJGDIGDKHFKHEJHEJHEJHEJGEKIFKIFKIFLJHLJHLJGLJGJHFJHFJGEIGEIGEIGEIGEIGEIGEJHGJHFJHFMLJMKJMKINLINLJNLJOMKPMKPNKNLJOLJQNLQNLROLROLVSPXUQYURZVR]XT^YT_ZU`[Vb\Vc]We^XibZg_XlcZoe\rh]vk_wj]xi[&''&''(()'()'()'()'((%&&#$$!""!""!""!"" //.//.///100110110110210211432432432432321543543643653653753753864864864964974:74:74:75;85;85<85>:7>;7=96=96>:6>:6?:6?;6>95>95<83=84;72<72<72<72=83=83;61:50<71B<7C=7C=7C=7D=7B<5B<6C<6C<6C=6C=6B;4F?8IA:KCKC:KC:NE:7<95=95=95=:6>:6>:6?:6=95>95>95<83=83=84<72<72<72;61;61;61>83B<6B<6C=6C=7C=7C=7B;5B<5B<5C<6A:4A;4A;4F?8JC;KC;KC;IA:IB:IB:JB:JB:JB:JB;IA9IA9KC;IA9LC;LD;LD;MDNENEQH?TJAWMCXMCVLBYODZOD^RGbVIqcTrcTq_jgWOF$).#).$).$).#(-"&* $!! " ! ""!&$"2.*E>8RJC`WO`WOd\Tg_Vh`WkcZjbZkc[me]nf^pg_me]qh`ld]g`Yg`Xb[Ub[Tc]Vg`Yf`Yf_Yd]Wc]Va[U`ZT`ZT^XR]XR]WRYSNVQLXSNYTOWRMWRMVRMVQMVQMUQMUQLUPLTPLTPLRNJPLHOLHOKHOKHNKHLIFLIFJGDJGDJGDIFDIFCIFCIFCJGEJGEJGDIGDKHFKHFJHFLIGLIGJHEJGEJGEIGEIGEIGEIGEIGEIGEIGEJHFJHFJHFMKIMKINKINKINKIPMKPMKNLINLIPNKQNKQNKROLTQMTQNWSOWTPXTPYUP^YT_ZU`[Ub\Vc]Ve^WiaZg_WlcZne[pe[vj^tg[l_S,,-,,-)*+'(()**++,++,))*()*()*&'(&'($%&##$!!"//.//.100100110110110322332432432432321542543543643643653753753753864864864964974:74:74:74;85=:6=:6<85<95=95=95>:6>:6>:6=94=94>95<83<83=83=84=94<72:51:61?94?:4?:4B<6D>8B<6C<6C=6C=7C=7B;5@:4@:4@:4@:4C<6E>7H@9JB;HA9JC;HA:IA:IA:IB:IB:JB:JB:JB:F>7F?7IA9IA9IA9NE=NE=QH?QH?QH?QH?TKATKAUKAUKBSJ@XND[QF\QF\QG_TI^SG_SH`THfZMsdVvgWq_~i~h|e}iVE?9#$&!! " #"!*(%50+G@8ZQHYPH`WNf]Td\Sg_Vmd[iaXmd\ne]og^of^ld\riaog_kc[jbZg`Xc\Ud]Vf_Xf_We^Wc\Ub\U`YSa[Ua[T_XR^XR^XR]XR[UPYSNZUOXSNYTOWRMWRMVRMVQMVQLUQLUQLUPLTPLRNJRNJPLHOLHOKHOKHNKGLIFJGDJGDJGDIFCIFCIFCIFCHFCHFCHECHECKHFKHFKHEJHEJHEJHEJGEJGEIGEIGEIGEIGEIGEIGEIGDIGDJHFJHFJHFMKINKINKINKIOMKPMKNLINLIOLIQNKQNKQNKROLTQMVSOWSOXTPYTPZUQ[VQ^XSa[Ub\Vd]Wf_Wf^WjbYmdZndZth]laVreXpaR122122/00//0//0,-.,-.*+,*+,()*$%&##$##$//.100100100110110110322332432432321321542542543764864653753753753863864864964964974:74:74<96=96;85<85<95<95=95=95>:5>:6<84=94=94>95<83<83=83=83=83<72:50>94@;6?94A;6C=7B<6B<6B<6B<6C=6C=6A;5?93@93@:3D>7D>7E>7E>7E>7G@9JB;H@9HA9HA9HA9IA:KC;KC;IB:IB:JB:JB:LDRI@PG>SJ@SJASJATJAVLCVMC[QG\QG_TI]RG]RG^SH\QF_TH_TI`UIdXKbVJn`RtfWp`raomsm~jzg}m\yiZteWk^Rk_Sj_Sj_Tg]Rg]Sg]Te\SmcZnd[lcZpg]qh_pg^pg^qh_qh_ne]kc[kc[md\h`Xg`Xe]Vd]Vd]Uc\Uc\UaZS`YS^WQ]WQ]WP\VP\VP]XRYTN[UOZUOXSMVQKWRMWRMVQMVQLVQLUQLUPLTPLTPLRNJRNJOLHOKHOKGOKGNKGJGDLIEJGDJFCIFCIFCIFCIFCHFCHECHECIGDIGDIFDJHEJHEJHEJGEHFCHFCIGEIGEIGEIGDIGDIGDIFDIFDIFDJHFJHFNKINKINKIOMJPMJNLINLIOLIQNKQNKQNKROKROLTQMWSOXTOXTPYUPZUQ[VQ]WR^XR_YSc\UiaYg_WlcZlcYi_UlbWlaUgZN344344112112112012011011011,--*++())000100100100110110322322332432210321321532542542764864653753753753853863864964964974:74:74<96;84;85;85<85<95=95=95=95>:5<84<84=84=94=94<73<83=83=83;62?:5@:5B<7@;5D?9C=7C=7A<6B<6@:4B<6B<6@:5A;5A;5C=6C=7D=7D=7F?8F?8F@9IB:IB:G@9E>7G@9JB;JB;HA9HA9HA9HA9IA9IA:G?8IA:KC;LD8D>9C=7A;5A;6C=7D>7D>7B<6B<6D>8G@9G@9G@:E?8E?8F?8F?8F?8F?8F?8F?8G@9E>7E>7E>7E>7G@9G@9H@9H@9H@9F?7F?7HA9KC;KC;OG>QI@TKBTKBTKBTKBTKBUKBRJASJAULCWNDZPFZPFXNEXNEXOE[QG[QG[QG[QG\QG\RG^TI_TI_TIbVKbWK`UJ`UJaVJaVKbVK`UIeYMfZNnaSqdVufXvgY|l]qatd{jvfufuguhzmawlavk`vlaukatj`of\lcZiaXi`Xh`Xh`Wg_Wg_Wb[SbZSaZS_XQ^WQ^WP]WP]VP\VPXRLYSNYSMVQKTOJTOIUPKUPKTOKTOJTOJUPLUPKTPKRNIRNIQMIQMIOKGMIENKGLIELIELHEJFCIFCIFCIFCIFCHEBHEBJGDIGDIFDIFCIFCIFCHFCHFCHFCHECHECHECGECIGDIGDIFDIFDIFDIFDIFDIFDJHELJGOMJPMJNKINKINLIOLIQNKQNKQNKRNKROKUQMWSOXTOYTPZUP[UP\VQ]WQ^XR`YS_XRaZRaYQc[Rf]Sg]Sj_Tl`TfZN778777555555455455445667445100100100100110321321322432210210320320532542542754764643653753753753853863863963964;85;85:74:74;74;84;84<85<85<95=95=95;73<84<84<84=84=84=94<73<73<83>94A;6A<6C=8A<6D>8D>8@;5@;5C=7C=7C=7C=7A;6B<6F?9F?9F@9F@9F@9G@9G@9G@9G@9C=6E?8C=6D=6D=6D=6D=6D=7D=7D=7D>7E>7G@8G@8G@9IB:G@9G@9G@9JB:NF>NF>NF>NF>NF>QH@QH@QH@ULCWNEZPGZPGZPGZPGXOEVMDVMDVMDVMDWMDWMDWMDWNDULBULBULCVLCZPF]RH]SH]SH`UJ`UJaVJaVK_TI`UI`UJcWLfZNgZNg[Nk^QseWyj[zk\}n^~n_qa|n_~pbyl_xl`sh]qg]lbYkaXjaXg^Vf^Vf]Uc[Sb[SbZSaZSaZR^WP^WP]WP]VP\VOXRLYSMWQKVQKTOITNISNIUPJTOJTOJTOJSNJSNJRNIPLHRMIQMIQMIOKGOKGNKGLIELHELHEJFCIFCIFCIFCIEBJGDJGDJGDIFDIFCIFCIFCIFCHFCHFCFDAHECHECHECGECGEBGEBIFDIFDIFDIFDIFDIFDJHELJGNKHNKINKINLIOLIQNJQNKQNKRNKROKUQMUQMXSOXTOYTPZUP[VPZUO\VP]WP_XQ^WP^WO`XPe\Se\Rf\Rj^Sj^Rj\O999999777677667445445100100100100321321321321210210210320320532542542754764642643753753753853853863863963;85974:74:74:74;74;84;84<84<85<85=95=95;73<83<84<84=84=84=94<72>94>94@;6@;6B=8C=8A<6A<6@:5@:5B<7B<7B<7B=7C=7C=7E?9E?9E?9F?9HA:F?9F?9F@9F@9D>7G@9G@9E>8C<6C<6C<6E>8G@9G@9G@9F?8F?8F?8HA:HA:HA:HA:HA:HA:HA:ME=ME=ME=ME=OG?OG?OG?OG?OG?QIATKBTKBTKBTKBTKBRIARIARIARJARJARJASJASJASJASJASJASJATJAVMCXOEXOEYOEYOEYOEYOEZPFZPFZPF]RH`UJ`UJcWLfZNfZNeYMj]QobUseWqdVpbTvhY}m]}n^{l]yj\zl_sh\ndYj`Wh_Vg^Vd\Sc[Sc[SbZSbZRaZR^WP^WP]WP]VOXRLZTMWQKWQKVQKTOITNISNIUPJTOJTOJTOJSNJSNIQLHPLGPLGQMIOKGOKGOKGNJFLHELHELHDJFCIFCIFBIFBJGDJGDJGDJGCIFCIFCIFCIFCIFCHFCHECFDAFDAFCAHEBGEBGEBGEBGEBIFDIFDIFDIFDIFDLJGLJGKHEKHEKHEMJGQMJQNJQNKRNKRNKSOKUQMTPLVRMWRNXSNYTOZUO[UP[UN\VO^WP]VO_WPaYPf]Sg]Sj_Ti^RfZM99999989988988966710/100100100321321321210210210210310320532532542754764642642753753753753853863863:85;85963:74:74:74:74;74;84;84<84<84<85=95;73;73<73<83<84<84=84?:5A<7=94@;6@;6B=7B=7?94A;6A;6?:5A<6A<6B<6B<6B<6D>8D>8D>8E>8E?8G@:E?8E?8E?8C=7E?9F?9HA:F?9F?9F?9D>7D>7D>7D>7D>7D>7G@9G@9G@9G@9G@9G@9G@9G@9IB;IB;MF>KDNF>NF>NF>PH@RJARJARJARJARJARJBPH@PH@PH@QH@QH@QH@QH@QI@QI@QI@QI@RI@RI@TKBVMDVMDWMDWMDWNDWNDXNDXNDXNE[PF_UJ`UJbWLcWLeZNh\Pi\Pi]Pl_Sk^Qi]Pj]Pn`SoaSn`RrcU}m]}n^yj\th[k`Vj`Vf]Td\Sc[Sb[SbZRaZR_WP^WP]VP]VOXRLXRKWQKWQKVQKTOITNISNIUOJTOJTOJTOJSNISNIQLGPLGPKGOKGOKGOKGMIELIEJGCLHDLHDJFCIFBIFBJGDJGDJGCJGCJFCIFCIFCIFCIFCIFCHFCFDAFDAFDAFCAFCAHEBGEBGEBGEBGEBIFDIFDIFDIFDLJGKHEKHEKHEKHELHEMJGNKGNKGPMIQMISOKTOKSNJUQLVQLWRMXRMYSNZTN[UO]VO_WP`YQ`XPe\Se\Rf\Qj^Sj^QgZL>>>::;::::::10/10/100321321321321210210210210310320320532542754754642642642753753753853853863:75963963963:74:74:74;74;74;84;84<84<84<84;73;73;73<73<83<83<83>:5@<7?:5A<7C>9A<7@;6@;6>94@;6@;6A;6A;6A;6A<6A<6E?9D>8D>8D>8D>8F@:F@:F@:D>8E>8G@:G@:E>8E?8E?8E?8E?8E?8C=7C=7C=7C=7C=7D=7D=7D=7D=7F?8F?8HA:JC95>95@;6@;7@;7B=8A<7A<7A<7A<7@;5D>9B=7B=7B=7B=7D?9FA;GA;E?9E?9E?9GA;E?9E?9D>8F?9F?9F@9D>8D>8D>8D>8D>8D>8D>8D>8D>8B<6B<6B<6C<6C<6E>8E>8E>8E>8G@9G@9G@9IB;IB;KD=KD=KD=KD=KD=KD=KD=KD=MF>MF>MF>OG@OG@OG@OG@OG@OH@MF>MF>MF>NF>NF>NF>NF>NF>PH@PH@PH@PH@QH@ULCULCULDUMDXNEXOEXOEXOF]SI]SI]SI`UKbWMbXMcXMcXMdXMdYMg[Og[Og\Ph\Pi\Pi]Pj]Qk^Qk^Ql_Ri\Oj]Pl^Pm_QoaSvgXwiZqdXh^Tf]TbZR_WP^WO^VOYRLYRKXRKWQKWQKWPJTNITNHSNHUOJTOITOISNIUPKQLGPLGPKGPKGOKFOKFOJFNJFNJFJFCJFBJFBIFBIFBKGCJGCJGCJGCJFCJFCIFCIFCIFCIFCIEBHEBGDAFDAFCAFCAFC@FC@FC@HEBGEBGEBGEBIFCIFCIFCKHEKHEKHELHENJGNJGNKGNKGOKGOKHRNIRNJSNJTOJVQLWRMXSMYSMZTN\UN]VO_WPaYQaXPcZQe\Rf\Qg\QbWLeWJ<<<10/10/321321321210210210210210310310320320532532753753764642642742752753753974:74863863963963963:63:73:73:74;74;74;74;84<84<84<84;73;73;73=94=95?;6?;6@;6B=8@;6@;6@;7A<7A<7C>8A<7A<7A<7B<7B<7F@:F@:F@:F@:D>9F@:F@:E?9C=7C=7E?9E?9C=7C=7C=7C=7C=7C=7C=7C=7C=7C=7C=7C=7E?9E?9E?9C=7D=7D=7D=7E?9E?9E?9F?9F?9F?9F?9GA:IC8E?:E?:E?:E@:E@:E@:F@:D>8B<7D>8D>8D>8B<7B<7B=7B=7B=7B=7B=7B=7B=7D>8D>8D>8D>8D>8D>8D>8D>8D>8D>8D>8F@:F@:F@:F@:F@:F@:F@:HB;HB;HB;HB;HB;LE>LE>LE>LE>LE>NG@NG@NG@NG@NG@PIAPIAQIAQIAOG@OG@OG@OG@OH@OH@OH@OH@PH@PH@RJBVMEXOGXOGZQH[QH[RH[RH[RI\RI\RI\RI\SI]SI_UK_UK_UK`UK`VKaVKaVKaVLdYMdYNeYNeZNfZNfZNg[Og[Oh\Oi\Pi]Pj]Pk^Ql_Qk^PpaSrcTpaRwgXl`TcZQ]UNZRKYRKXQKXQKWQJUOHTNHTNHWQKWQKTOITNISNIQLGQLGPLGPKFPKFOKFOJFOJFNJFLHDLHDNIEKGDIFBIEBIEBIEAHEAHEAJFCJFCIFCIFBIFBIFBIEBHEBGD@FC@FC@FC@FC@FC@FC@FC@FC@HEBHEBHEBHEBKHELHELHELHENJGNJGNKGOKGOKGPLGPLHSNJSOJTOJUPJVPKYSMZSM[TN\UN^VO_WPaYPaXPcZQcZPf\QaVK10/32132110/21/210210210210210310310310320532532753753753642642642742752964964974853853863963963963963:63:63:73:73:73;73;74;74;84<84:62<84<84<84>:6?:6A<7?:6?:6?;6?;6@;6@;6B=8B=8@;6>95@;6B=8D?9D?9D?9E?9E?9GA;E?9C=8C=8C=8C>8C>8A<6A<6A<6A<6A<6A<6C>8C>8C>8C>8C>8C>8C>8C>8C>8C>8C>8C>8C>8C>8E?9E?9E?9E?9E?9E?9E?9E?9E?9GA;GA;GA;ICKD>KD>MF?MF?MF?MF?MF?MF?OHAOHAOHANF?NF?NG?PHAPHARJBRJBTLDTLDVNFXPG[RI[RI[RI[RIYPHZQHZQHZQHZQHZQH[QH[RH[RH^TJ^TJ^TJ^TJ_UJ_UJ_UK`UKbWMcXMcXMcXMdYMdYMeYNeZNfZNgZNg[Oh[Oi\Oj]Pk]Pn`RoaSoaRqbTpaRrcTi^S\TL[TMZSLZSLYRLWPJVPJTNHUOIUOITOITNISNIQLGQLGPKFPKFPKFOKFOJFOJFNJFLHDNJEMIEMIEIFBIEBIEAIEAHEAHEAHEAHDAIFBIFBIFBIEBIEBGD@GD@GC@FC@FC@FC@FC@FC@FC@FC@FC@HEBHEBHEBLHELHELHENJGNJGNKGOKGOKGPLGPLHSNISNJTOJUOJUPKVQKWQKZTN\UN]VO_WO^VN`XObYP`WNcYO32132132110/21/21/210210210210310310310320531532753753753642642642742864964964752852853853863963963963963:63:63:73:73:73;73;73;73;74;74=95<84<84@;7>:5B=9@<7@<7@<7A<7A<7A<7A<7A<7A<7@;6>94C>9D>9D>9D>9D>9F@;F@;F@;D?9B=8B=8B=8@;6A;6A;6A;6B=7B=7B=7B=7B=7B=7B=7B=7B=7B=7B=7B=7B=7B=7B=7B=7B=7D>9D>9D>9D>9D>9D>9D>9D>9D>9D>9D>9F@:F@:HB:A=8B=8B=8@;7@<7B=8B=8B=9A<7A<7C>9A<7C>9C>9C>9C>9E@:E@:E@:E@:C>9A<7B<7B<7B<7B<7B<7B<7B<7B<7B<7B<7B<7B<7B<7B<7B<7B<7A<7A<7A<7A<7A<7A<7C>8C>8C>8C>8C>8C>8C>8C>8C>8C>8C>8C>8E?9GA;GA;GA;GA;GA;GA;GA;GA;GA;GA;KD>KD>KD>KD>QIBQJBQJCQJCSKDSKDUMFSLDSLDSLDSLDTLDVNFVNFXPGXPGXPHXPHYPHYPHYPHYPHYQHZQHZQHZQHZQH[QH]SJ]TJ]TJ\RI\RI\SI]SI]SI]SI^SI`VK`VKaVKaVLbWLbWLbWLcXLcXLdXMdYMeYMfZMfZNg[Nh[Nk^Pl^QoaSqbTn`RqbSqbRi]P]UMXQJUNHTNHVOIUOIUOITNITNHRLGQLFQLFPKFPKFPKFOKFOJEQLGPKGNJENIENIEMIEKGCKGCKGCJGCIEAHEAHDAHDAHDAHDAIFBIEBIEBGD@GD@GC@GC@GC@GC@GC@GC@GC@GC@GC@GC@HEBJGCJGCKGCNJGOKGOKGOKGPKGPLGQLGSNITOJTOJUPJVPKWQKXRKYRLZSL\TM]UN]UM_VM32110/10/20/21/21/21/21/210210310310310310320531753753753753642642863863863964752752852852852852852963963963963;84;84;84<84<85<85<85@<8@<8@<8?:6A<8A<8C>9C>:A=8A=8A=8C>:C?:C?:B=8B=8B=8F@;D?:@<7@<7B=8B=8D?:D?:B=8FA;D?:D?:C=8A<7A<7A<7A<7A<7A<7A<7A<7A<7A<7A<6A<6A<6A<6A<6A;6A;6A;6A;6@;6@;6B=8B=8B=8B=8B=8B=8B=8B=8B=7B=7B=7B=7B=7D>9F@:F@:F@:F@:F@:F@:F@:F@:HBGC@GC@GC@GC@GC@GD@KGCKGCMIEOKGOKGOKGPKGPLGQLGQLHTNITOJUOJVPJWQKXQKYRLZSL[TM]UM\TL10/10/20/20/21/21/21/21/21/210310310310310320531753753753753642853863863863742742752752852852852852852:74:74;74;74;84;84;84;84<84?;7?;7@<7>:6>:6@<8B>9D?;D?;A<8A<8A<8C>9C>9C>9C>9C>9C>9E@;E@;E@;B=8B=8E@;E@;E@;E@;E@;D>9D>9B=8B=8B=8B=8@;6@;6@;6@;6@;6@;6@;6@;6@;6@;6@;6@;6@;6@;6@;6@;6@;6@;6@;6A<7A<7A<7A<7A<7A<7A<7A<7A<7A<7A<7A<7C>8C>8C>8E?:E?:GA;GA;HC=HC=JD>JD>JD>JD>JD>LF@NHAPICPICPICPJCPJCQJCQJCQJCSLDSLDUMFUMFSLESLEUNFVNFVNFVNFVNFVNFWNFWOFWOGWOGWOGXOGXOGXOGXPGYPG[RI[RI[RI[RI\RI\SIZQH[QH[QH[RH\RH\RH\SI]SI_UK`UK`UKaVKaVKbVKbWLcWLcXLdXLdXMeYMeYMfZMi\Oi\Oj]Pn`RoaSqbTo`RrcSaVKWPIVOIUOIUOHTNHRLFRLFQLFQKFPKFPKFRLGQLGQLGQLGPKGNIENIENIEMIDKGCKGCKGCKFBJFBJFBJFBJFBJFBIFBHD@GD@GD@GD@GD@GC@GC@GC@EB>EB>GC@GC@GC@GC@GD@GD@IEBMIEMIEMIEOKGPKGPLGQLGQLGRMHTOJUOJVPJVPKWQKXRKZSL[TL\UM10/20/20/20/21/21/21/21/21/21/310310310310310531753753753753641853853863863742742742752752852974:74:74:74:74:74;74;74;74;84=96?;7?;7?;7=:6>:6>:6A=9A=9C?:C?:B=9B=9D?;D?;D?;D?;D?;D?;HC>HC>HC>FA9C>9A<7A<7A<7A<7A<7A<7?:6?:6?:6?:6?:6?:6?:6?:6?:6?:6?:5?:5?:5?:5?:5?:5?:5@<7@<7@<7@;7@;7@;7@;7@;7@;7@;6@;6@;6D?9D?9D?9D?9F@;GBIC>IC>IC>IC>IC>KE?KE?KE?KE?MGAMGAMGAOIBOIBOIBOIBPIBPIBPIBTLETMETMETMEVNGVOGTMFUMFUMFUMFUMFUNFUNFUNFVNFVNFVNFVNFVNFVNFWOFWOFYQHYQHXOGXOGXPGYPGYPGYPGZQHZQH[QH[QH[RH\RH\RI]SI_UJ`UK`VKaVKaVKbWLbWLcWLdXLdXLeYMeYMh\Oi\Oi\Ol_Qm_Rn`RoaSm_PqbSi[MWPIUOIUOHTNHRLFRLFQLFQKFRMGRMGRLGQLGQLGQLGOJENIENIENIDMIDKGCKGCKGBKFBJFBJFBJFBJFBJFBJFBIFBIEBIEBGD@IEAIEAGC@EB>EB>GC@GC@GC@GD@GD@GD@IEBIFBMIEMIENIEPKGPLGQLGQLGRMHRMHUOJUPJVPJWQKXRKYRLZSL10/20/20/20/21/21/21/21/21/21/31/31/310310310531753753753753753753853853863742742742742963963963963:74:74:74:74:74:74;74<95=95>;7>;7=95=95=96=96A=9A=9A=9C?:C?:A=9C?:C?:C?:C?:C?:D?:GB=GB=GB=GB=GB=EA:5>:5>:5>:5>:5>:5>:5>95>95>95>95>95@;6?;6?;6?;6?;6A<8A<8A<8A<8A<7A<7C>9C>9E?:E?:E?:FA:7<95>:7>:7<95=95=95=95?;7?;7@<8A<8B>:B>:C>:C>:C>:C>:C>:C>:FB=FB=FB=FB=FB=GB=HC>HC>GB=E@;C>:C>:A=8A=8A=8A=8A=8A=8?;7?;7?;6?;6?;6?;6?;6?;6?;6?;6?;6?;6?;6?:6?:6?:6?:6?:6?:6@<7@<7@<7@<7@<7@<7@<7@<7@<7B=8B=8D?:D?:D?:E@;E@;E@;E@;GB=GB=GB=GB=ID>ID>ID>ID>ID>ID>ID>ID>ID>JD>JD>JD>LF@LF@MGALF@MGAMGALF@LF@LF@MGAOICOICOICOICOICOICPICPICPICPICPJCPJCQJCOHBOIBPIBPIBPIBPIBQJBQJCSLDSLDTLDTLEUMEUMEUMEVNEVNEWNFWOFXOFXOFYPF[RH\RH^TJ_UJ_UK`UKaVKaVKbWLcWLcXLdXLeYMeYMh[Ok^Ql^Ql_Qm_Rn`Ro`Rm_Qn`Qo`QrbRRLEOICPJERLFQLFSMGRMGRMGRLGQLGQLFQLFOJENIDNIDNIDNIDLGBKGBKGBKFBKFBJFBJFBJFBJFBJFBJFBJFAIEAIEAHD@GD@GD@IEAIEAIEAIEAIEAIEAJEAJFBMIELHCLHCNJENJEOJEQLGRLGRMHSMHUOJVPJYRLZSL20/20/20/20/21/21/21/21/21/21/31/31/31/31/31/531742752753:86:86964974974974974:74:74:74:74:75:85;85;85;85;85;85=:6=:6=:6=:6?<8>:6<95<95<95<95>:7>:7@<8@<8B>:B>:D?;B>:B>:B>:B>:B>:FA=FA=FA=FA=FAID?GC>GC>FA:6>:6>:6>:6>:6>:6>:6>:6>:6>:6>:6>:6>:6>:5@;7@;7?;7?;7?;7A=8A=8A=8A=8A=8A=8D@;D@;D@;FAHC>HC>HC>HC>HC>HC>IC>IC>HC>HC>HC=HC=FA:6>:6>:6@<8@<8A=9A=9C?;C?;B=9B=9B=9EAGB>GB>GB=GB=GB=GB=B=9B=9@<7@<7>:6>:6@;7@;7@;7?;7?;7?;7?;7?;7?;7?;7?;7=95=95=95=95=95=95=95=95?;6@<8@<8@<8@<8@<8@<8@<8@<8B=9C?:EAHC>HC>HC>JE@JE@JE@JE@LGALGAKE@KE@KF@KF@KF@LF@LFALFAMGAMGAMGANGANHANHAOHBOHBOIBPIBPIBQJBSLDSLDTLDTMEUMEVMEVNEWNFWNFXOFXOFYPF[RH\RH_UJ_UK`UKaVKaVKbWLcWLcXLbVKcWKfYMfZMgZNh[Ni[Ni\Oj\Ok]Ol^Ol^Pm_Pn_PrbROICPJDPJDOJDOIDOICPKEPJEQLFQLFOJDOIDNIDNIDNIDLGBKGBKGBKFBKFBKFBJFBJFBJFBJFAJFAJFAHD@HD@HD@JEAJEAKGCKGCJEAJFAJFAJFAJFBPKFPKFPKFPKGOJEQLGRLGTNIUOIUOIVPJ20/20/20/20/21/21/21/421421421421421642642642642:75;97;97:85<97:85:85964974974974974:74:74:74:74:74:74:74<96<96<96<96>;7=96=96=96=96=:6=:6?;8=:6=:6=:6?;8?;8C?:C?:C?:C?:A=9A=9D@:A=8A=8A=8A=8?;7?;7?;7?;7?;7?;7?;7?;7>:7>:6>:6>:6>:6>:6@<8@<8@<8@<8@<8A=9@;7@;7?;7A=9A=9C>:C>:C>:C>:D@;D@;D@;D@;D@;D@;D@;D@;D@;D@;D@;D@;D@;D@;D@;D@;D?;D?;D?;C?;C?;B=9A=9A=9A=9A=9C?;C?;D@FB>EAID?ID?ID?ID@JE@JE@JE@KE@KE@KF@LF@LF@LFAMGAMGAMGANHANHAOHBOHBPIBPIBQIBQJCRJCTLDUMEUMEVMEVNEWNFWOFXOFYPF[RH\RH]SI_UJ`UKaVK_TJ`UJ_TI]RG^SHaUJcWKfZMgZNh[Ni[Ni\Oj\Ok]Ol^Ol^Pm_Pn_Po`QVMDPJDPJDOJDOIDOICNICNICPJEOJDOJDOIDNIDNIDNIDLGBLGBKGBKGBKFBKFBKFBJFBJFAJFAJFAHD@HD@HD@JFAJFAJFALGCLGCLGCLGCLGCLHCNIEPKFPKFQLFQLGQLGPKERMGSMGUOI20/20/320420420420420420421642642642642642642642975=:8=:8=;8=;8=;8:85964964964974974974:74:74:74:74:74<96<96<96<96<96<96<96<96<96=96=96=96>;7>;7?;7=96?;7?;7B>:B>:B>:B>:B>:B>:D@GC>EA=EA=EA=EA=D?;C?;B>:B>:B>:@<8@<8@<8@<8@<8@<8A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9A=9@=9@=9B>:B>:B>:B>:D?;C?;C?;C?;C?;C?;EA:B>:B>:B>:B>:@<9@<9@<9@<9@<9@<8?<8?<8?<8?<8A=:A=:A=:A=:A=:C?;C?;C?;C?;DA=C?;C?;C?:7>;7>;7>;7>;7>;7@<8@<8B>:B>:B>:B>:EA=C?;C?;C?;C?;C?;EA=FB>EA:B>:B>:B>:A>:A=:A=:A=:@=:@=:?;8>;8>;8>;8>;8>;8>;8>;8>;8>;8>;8>;8?<9?<9?<9?<9?<9?<9A>;A>;@<9@=9@=9@=9@=9@=:A=:A=:A=:A>:A>:C?;EA=EA=FB=HC?HC?HD?ID?ID?JE@JE@JE@KE@KF@LF@LFAMGAMGANGAMF@MG@NG@NGAOHAOHAPIAPIAQIBRJBRJBSKBQIARJAULCWNEVMDWMDWNDXND[PF[QG\QG]RG]RG`UIaUJbVJbVJeYLfYLgZMgZMfYLgYLgZLhZMi[Mj[Mm^Om^OULDNHBPJDOICOICOICNICNHCNHCLFAMHBMHBNIDNIDNIDLGBLGBKGBKFBKFBKFBKFAKFAID@ID@ID@JFAJFAJFAJFAJFAJFALGCLHCLHCNIENJEPKFQLFQLGQLGRLGRMG320420420420532542542542642642642642642853853975<:8=:8=:8=:8=:8=:8:85:85964964964964964964964974;85;85;85;85;85:74<85<85<85<95<95<95<95>:7>:7>:7>:7>:7>:7?<8?<8A=:A=:A=:C?;D@FB>D@:B>:B>:@=9@=9@=9@<9@<9@<9>;7>;7>;7>;7>;7>:7>:7>:7>:7?<8?<8?<8?<8?<8?;8?;8A=9@=9B>;B>:B>:B>:B>:B>:B>:B>:B>:C?;8>;8>;8>;9>;9>;9>;9><9><9?<9?<9?<9?<9?<9@=9@=9@=:A=:A=:A>:C?;C?ID?JD?JE?KE?KE?LF@LF@MF@MG@NGANHAOHAPHANG@OH@PH@PHAOG?PH@PH@SJBSKBTKBULBULCVLCWMCYOE\QG]RG`TI`UIaUJbVJbVJcWKdWKeXKeXKfYLgYLhZLhZMi[Mj[Mk\Mk]Nn_PNHBNHBOICOICOICNICNHCNHCLFAKFAMHBMGBNIDNICLGBLGBLGBKFBKFBKFAIE@ID@ID@ID@KFAKFAJFAJFAJFAKFAKFAKFAMHCNIEOJEQKFQLFQLFRLGRLG420532532532532542542542642642642753753753975975<:8=:8=:8=:8=:8=:8;96:75:75964964964964964964;85;85;85;85;85974;85;85;85;85;85;85;85;85=:7=:7=:7=:7=:6?;8?;8?;8@=9@=9@=9D@:EA=EA=C@:A>:A>:@<9@<8@<8?<8?<8?<8?<8?<8>:7=:7=:7=:7=:6=:6=:6=:6=:6>;8>;8@<9@<9@<9@<9A>:A>:A>:A>:A>:A>:A=:A=:A=:A=:B?;B>;A>;@<9?<9?<9?<9><9>;9>;9>;9<:7<97<97;97;97;97;97;97;97;97;97;86:86:86:86:86:86<:8<:8<:8<:8<:8;96;97;97<:8<:8=:8=:8=;8=;8=;8>;8>;9>;9>;9?<9?<9?<9?<9@=9@=9@=:B?;C?;C?;D@:C?;C?;EAHC>HC>ID?ID?JD?JE?KE?LF@LF@MF@MG@LF?MF?MF@NG@MF>MF?NF?NG?OG?PH@RJASJBTKBTKBULCULCVMCWMCYOEZPE]RG^SH\QF]RG^RG^SGcWKdXKeXKfYLfYLgZLhZLh[Mi[Mj\Mk\Nk]Nl]NNHBNHBPICOICOICNICNHCNHCLFAKFAKFAKFAMGBNICNICLGBLGBLGBKFAIE@IE@IE@ID@KFAKFAKFAKFAKFAKFAKFAKFAKFAMHCOJEOJEQLFQLFRLG532532532532532532542542642753753753753753975975<:8<:8=:8=:8=:8=:8;96:75:75863863963963963:85:85;85;85;85963963;85;85;85;85;85;85;85=:6=:6=:6=:6=:6=:6>;8>;8>;8>;8B>:B>:C?DA=C?;C?;C?;C?;B?;A=:A=:A=:?<8?<8?;8?;8?;8?;8?;8>;8>;8>;8=96=96=96<96<96<96>;7>;7>:7>:7?<9?<8?<8A=:@=:@=:@=:@=:@=:@=9@<9?<9?<9@=:><9>;9>;9=;8=;8=;8=:8<:8;97;86:86:86:86:86:86986986986986976976976976976976976976:97:97976976976976986986;97;97;97;98;:8<:8<:8<:8<:8=:8=;8=;8=;8>;9>;9><9?<9>;8>;8>;8?;8?<8@<8@<9@=9A=9A=9B>9B>:D@;E@;E@JD>KE?LE?JD>KD>LE>LE>MF?MF?NG?OG?OG@PH@RJBSJBTKBTKBULCVLCVMCWMCVLBXND[PF\QF]QF]RG^RG_SG_SGbVIcVJdWJdWJeXKhZLfYKgYKhZLhZLi[Lj[LOHBNHBNHBOICOICOICPJDPJDNHBMHBMHBMGBMGBNICNICNICNHCMHCLFAKFAKFAKFAMHCMHCMHCMHCMHCMHCMHCMHCMHCMHCMHCOJEQLFRLF532532532532532532542753753753753753753975975975<:7<:7<:7<:7=:7=:8;96:75:75:75863863863:75:75:75:85963963963:85;85;85;85;85;85;85;85<96<96<96<96<96>;7>;7>;7>;7>;7>;7DA=DA=C?;B?;B?;D@=D@=EB>D@=D@=D@;B>;@=9@=9@=9@=9>;8>;8>;8>;8>;8>;8>;7>;7>;7>:7>:7?<9=:7=:7=:7=:7=:7=:7=:7=:7=:7=:7>;8@=9@=9@<9?<9?<9?<9>;9>;9<:7<97=:8<:8<:8<:8<:8;98:86986986976976976876876875875875875865765765765765765765765765765765765765765765865875875875:87:87:87:97:97;97;97;97;98<:8<:8<:8=:8:85:86;86;86;96<96<96<96=:7?<8?<8@<9@<9A=9A=9B=9B>:C>:C?:B=9D?:E@;E@;HBKE>LE>LE>MF?NF?NG?OG?OH@RJASJBSKBTKBSJASJATKAUKBULBVLB[PF[QF\QF]RF]RG^SG]QF]RF`THaUHbUHbUIcVIdVIdWIeWJfXJfXJgYKj[MQJCPJCPICOICQKDQJDPJDPJDNHBNHBMHBMGBMGBMGBNICLGBNHCLGALGALFAKFAMHCMHCMHCMHCMHCMHCMHCMHCMHCMHCOJEOJEPJE532532532532532753753753753753753753974974974974<:7<:7<:7<:7<:7;86;96;96:75:75863863:75:75:75:75863963963:75:75:75:75:75:74:74:74<96<96<96<96<96<96=:7=:7=:7=:7=:7?<8@=:D@=D@=C@;C@EA>C@;A>:A>:A>:?<9>;7?<9?<9?<9?<9?<9?<8?<8>;8>;8>;8>;8>;8=:7<:7<:7<97<97<96<96<96<96=:7=:7<:7<97<97=:8;97;86:86:86;97;97:97:97876876875875865765765765765765665665655655655655655655655655655655443443655655655655655655665665765765876976987987987987:87:97865865865875975975975:85:86;86;86;96<96<96=:7?;8?<8@<9@<9A=9A=9@<8A<8A=8B=9B=9A<8B=8D?9D?:E?:GAKE>LE>MF>MF?NF?OG?OG?RJAPH@QI@RIARJASJATJATKBUKBVLBVLBYODZODZPE[PE\PE\QF]QF^RF`THaUHbUHcVIcVIdWIdWJeWJfXJfXJgYKhYKQJCPJCPICPICQKDQJDPJDPJDNHBNHBMHBMHBMGBMGBMGBNICLGBLGALGALGALFAMHCMHCMHCMHCMHCMHCMHCMHCNHCOJDPJE531531531753753753753753753753753753974974974974<:7<:7<:7<:7<:7;86;86;86974974853:74:74:74:74863863863863:74:74:74:74:74:74:74:74<96<96<96<96<96=:7=:7=:7=:7=:7=:7@=:@=9@=9C@C?;8>;8>;8>;8>;8>;8>;8>;8>;8>;8>;8=;8=;8=;8<96<96<96;96;86;86:86:85;97:86:86875865865764764754875865654654765765665655655655554554554554554544544544444444444444444444333333333333444444544544544554554554554554655776776876544654654654654754764764865865875975975:75:85:86;86;96<96<96=:7?;8?<8>;7?;7?;8@<8?;7?;7@;7@<7A<7A<8B=8B=8C>8C>9GBLE>LE>MF?NF?LE>ME>NF>PH@QH@QI@RIAQH?QI@RI@SI@SJ@VLBWMBYODZOE[PE[PE\QE]QF]RF^RFaTHaUHbUIcVIcVIdWIeWJeXJfXJgXJgYKhYKQJCPJCPJCPICOICQJDQJDPJDNHBNHBNHBMHBMGBMGBKE@MGBKE@LGALGALGANHCNHCNHCNHCMHCNHCNHCNHCNHCPJD531531753753753753753753753753753964964974974974<:7<:7<:7<:7<:7;86;86;86;86974974974974974853853853853:74:74:74:74:74:74:74:74:74;85;85;85;85;85=:7=:7=:7=:7<97>;8?<9?<9?<9?<9B?B?;8>;8>;8>;8>;8=;8=;8=:8=:8=:8=:8=:8=:7=:7=:7;86:86:85:75975975875865865764764754654654654554432543543443443433433333333333333333444444344334334334333333333333333222222222222222222333333334334344444444444444444444222433433443443543543544654654654754764764865865975975975:85:86;86;96<96;85;85=:7>:7=96=:6>:6>:6?;7?;7@;7@<7A<8A=8B=8C=8C>8D>9D?9GA;GA;HA;HB;KD=LD=LE>ME>LD=NF>OG?OG?PG?QH?QH@RI@RI@SJ@TJAVLBWMCXMCZPE[PE\PE\QF]QF^RF^RFaUHbUHbVIcVIdVIdWIeWJeXJfXJgYJgYKhYKOHBQJCPJCPICOICQKDQJDPJDNHBNHBNHBMHBMGBKF@KF@KE@MGAKE@LGANHCNHCNHCNHCNHCNHCNHCNHCNHC753753753753753753753753753753964964964964964964<97<97<:7<:7<:7;86;86;86;86;86974974974853853853853853974974974974974974974974;85;85;85;85;85<96<96<96<96<96<96?<9?<9?<9?<9@=:B?C@=A>;A>;A>;A>;?<:?<:?<9?<9?<9?<9=;8=:8=:8=:8=:8=:8=:7=:7=:7<:7<:7<:7<97;97975975975875865764764543543543432544432332322111110110222221111111111111111011011001001001001/01/00112012012223123012012012012012012012012223223223233233233233333011111111111111333333333433433443543543544654654654764764865865875975975864974974:75:85964:74<95<96=96=:6>:6>;7?;7@;7@<7A<7A<8@;6A<7A<7B<7B=7A<6D>8D>8C=7C=7D>7F@9G@9H@9LD=ME=ME=NF=NF>OF>PG>RI@SJ@SJ@VLBWMCWMCXNC[PE[PE\QE]QF]RF^RF^RFaUHbUIcVIcVIdWIdWJeWJfXJfXJgYJeWIfWIOHBOHANHANHAPICOIBQJDPJDNHBNHBNHBNHBLF@KF@KF@KF@KE@MGALGANICNHCNHCNHCNHCNHCNHC753753753753753753753753753964964964964964964964<97<97<97<97:85:85:85:85<97;85974974753753853852852964964964964964964964964964:85:85:85:85:85<96<96<96<96<96=:7?<9><9><9@=:@=:@=:A>;C@=A>;B?;?<9?<9?<9?<9><9><9><9=:8=:7<:7<:7<:7<:7<:7<:7<97;97;97:86:86865865764764754654433432221211211111110222100000000000/00011001001001001/00/00/00/00//0./0./0./0./0./0./0./0-./-././0./0./0./0/01/01/01/01/01012122123123123//0/00/00/00/01001001011011111222333333333433443443543544554654654754764865753754864864964853863963964:74:74<96=96=:6<85=95=95>:5>:6?:6?;6@;6A<7?:5@;6?:4?:5B<6B<6C=7C=7D>7D>7E>8F?8HA:KC;KC;9>;8>;8?=:?=:?<:?<:A>;@>;@>;@=;@=;A?<9><9>;9>;9>;9>;9>;9<:7<:7<97<97<97;97;97;86:86:86986976764754654654544221211211110110100000000../......//////.///00/00//0//0./0./0./0./0./0-/0-.0-.0-.0-.0-./-./,-.+-.+-.+-.+-.+-.+-.-./-./-./-./-./-././1.01-.0-/0./0./0./0./0./0./0//0/00/00001001001011222222333333333433443543544554654543543643653753642643753753853863752852853963;84;85<85=95=95>:5>:6?:6>95=83=84>94>94?:4?:5@:5B<6C=7C=7D=7D>7E>8F?8F?8IA:KC;8>;8?<:?<9?<9?<9?<9@=;@=;><9@=:@=:?=:?=:>;9?=:=;9=;9=;8=;8=;8;97;97;97;97:86:86986976876875865654544543332211110110000000000/00......-..--.--.,-.,-.,-.-././0./0-/0-.0-.0-./+-.+-.+-.+,.+,.+,.+,.+,.)+-)+-)+,*,.*,.*,.*,.*,.*,.*,.*,.,-/,-/+,.*+-+,.+,.+,.+-.+-.-./-./-.0-.0./0./0./0./0//0/00/00001001011222222333333333433322332432432543543432532542642531531641642742752852853963963;85<85=95;84:63;73;73<83=83=84>94>94?94?:5@:5@;5C=7C=7D=7D>7E>8E?8F?8HA:IB:JB:JB:MD<9><9><9@=:?=:>;9>;9>;9?<:?<:=;9=;8=;8=;8=;8=:8=:8;97:86:8698697697687686576576566533232200//////////.//......-..,,---.--.,-.,-.,--+,-+,-+,-+,-*,-+-.+-.+,.+,.+,.*,.*,.*,.*,.*,-*,-*+-(*,(*,(*,(*,(*,(*,(*,(*,(*,(*,)+-)+-(*,(*,')+(*,)*,*,-*,.*,.*,.*,.+,.+,.+,.+-.+-.-./-.0-/0./0./0./0//0/00/0000100111122211122122232233243232132142131032042142153142042052053063164174174185252/741962:62:63;73;73<83=83=84>94>94?:4?:5@:5@;5C=7C=7D=7D>7E>8E?8F?8HA:IA:JB:JB:KC;KC;NE=NF=OF=OF=RI?RI?SI@TJ@TJ@UJ@SI?XMBXMBYNCYNCZNCZOC[OC[PD^RE_RF_RF`SF`SFaTFaTGaTGbTGbUGNG@NG@NG@MF@MF?NHANGANGAOIBMG@MG@MF@LF@LF@LF@NHANHAOICOIC753753753753753752864864864864864864864864864864:75;96;96:75;96;96;96;96:75863863863742742742863863863863863863853853853853974974974:85:85:85:85:85:85;97=:8=:8><9><9>;9>;9>;9?=:?<:=;9=;9=;8=;8><:=:8=:8<:8<:8<:8:86875865764764654654654544543443433110//////.//......-..-..--.+,-+,,++,*+,*+,*+,)+,)*,)*,)*,')*')*'(*()+()+')+')+)+-)+-)+-)+-)+-)+-'*,'*,'),'),'),'),'),'),'),'),'),&(*&(*%')%')&(+&(+&)+(*,)+-)+-)+-)+-*+-*,-*,.*,.*,.+,.+,.+-.+-.,-.-.0./0./0./0//0/00/01/////0/0000021110011000/10/0/.0//10/10/20/21/31/31042042052053063152041.41/52/52/62/841851951951;73<83=83=84>94>94?:4?:5@:5@;5A;5C=7D=7D>7E>8E?8F?8G?8IA:JB:JB:KC;KC;LC;LD;OF=OF=RH?RI?SI?QH>RH>RH>SI?UK@XMBYNBYNCZNCZOC[OC[OC[PD\PD_RF_RF`SF`SF`SFaTGaTGbTGMF?OG@NG@NG@MF@MF?MF?NGANGANG@MG@MG@MG@MF@LF@PJCPICPIC753753752752752752864864864864864864864864864864:75;96;96;96;96;96;96:75:75863863863642642642863853853853853853853853753753964964964:85:85:85:75:75;96<:8<:8>;9>;9>;9=;9=;9?<:?<:=;8=;8=:8=:8<:8<:8;97<:8;:8;97865764754654654554543543443433333211///......-..-..--.,-.,-.++,*+,*+,*+,)+,)*,)*,)*+(*+'(*'(*&(*&(*&(*&(*')+')+&(+&(+&(*&(*&(*')+%(*%(*%(*&)+&)+&)+&)+&)+&)+&)+&)+%(*%(*$&)$')$')$')$')$')&(*&(*(*-(*-(+-)+-)+-)+-)+-)+-*,-*,.*,.*,.+,.+-.+-.,-.,-.,-/./0-./.././/./////...---,,,-,,---/..//.0/.0//10/10/20/21/31/31042031/31/20.30.31.41.41/52/52/62/63/851951951:62:62;72=84>94>94?:4?:5@:5@;5A;5C=7D=7D>7E>8E?8F?8G?8IA:IB:JB:KC;KC;LC;LD;MD;KC:NEQG>QH>RH>RH>SI?SI?VKAXMBYNCYNCZNCZOC[OC[OC[PD\PD_RF_RF_SF`SF`SFaTGaTGbTGMF?OG@NG@NG@MF@MF?MF?NGALF?LE?OIBMG@OHBOHBPJCPJC752752752752752864864864864864864864864864864864863;96=:8;96;96;96:75:75975863863853853642853853853853753753753753753753964864864:75:75:75:75975;86;86<:7=;9=;9=;9=;8=;8=;8><:><:<:8<:8<:8<:8<:8;98;97:97764543543432432332332322222221333000//0...-..--.--.,-.,-.,,-*+,*+,)+,)*,)*,(*+(*+(*+()+&(*&(*&(*&()%')%')%')%')&(*%(*%(*%(*%'*%'*$&)$&)#&)#&)$'*$'*$'*$'*$'*$'*$'*$'*#&)#&)#&)#&)#&)#&)#&)$&)$&)$&)$&)%(*'*-(*-(*-(*-(*-(+-)+-)+-)+-)+-*,-*,.*,.+,.+-.+-.+,-+,-+,-,-.-./*+++++++,+,,,,,,,,-,,--,---.--..-0//10/10/20/21/10.20.1/-1/-20.20.30.31.41.41/52/52/63/63/52.740840840950951:51<73=83=83>83>93?94?94B<5B<6C<6E>8E?8F?8G?8IA:IB:JB:KC;KC;JA9JB:JB:KB:KC:NEQH>RH>RH>SI?SI?VKAVLAYNCYNCZNCZOC[OC[OC[PD\PD\PD_RF_SF`SF`SFaSFaTGaTGMF?OG@NG@NG@NG?MF?MF?KD>LF?LE?QJCOIBOHBOHB964964752752864864864864864864864864863863863863:75=:8=:8;96;96975975975853853853853853853753753753753753753753753752752864864864975975975975975:86<97=;9=;8=;8=:8=:8<:8><9>;9=;9<:8<:8;98;97:97:87875765654544543443433333333333111//0...-..,,-+,-+,,*+,*+,)*+()+()*()*')*')*(*+()+')+')+&(*%')%')%')%')%')$')$')$&)$&)%'*$'*$'*#&)#&(#&(#&(#%(#%("%("%("%("%(!$'!$'"%(!$'!$'!$'!$'!$'"%(#%(#%(#&(#&(#&(#&(#&)#&)&(+'*,'*,'*,'*-(*-(*-(+-)+-)+-)+-)+-*,-)+,)+-*+-*+-*,-(*+()*()*()*))*)**++,+,,,,,,,,-,,--,---.--..-/.-.-,.-,/.,.-+0.-0/-1/-1/-20.20.30.2/-20-30-30-41.41.51.52.62.840840951951:51:61=83=83>83>93?94?:4@:4B<6C<6C=6D=6D=6E>7IA:IB:H@9IA9IA9IA9JB:JB:KB:KC:NERH>SI?SI?SI?VKAVLAYNCYNCZNCZOC[OC[OC[OD\PD\PD_RF_SF`SF`SF`SFaTFNG?NF?OH@OG@NG@NG?MF?MF?KD>NGANG@QJCQJC964964964752864864864864864864863863863863:75:75=:7>;9<:7<:7975974974974853853753753753964964753753753752752752752742742864863975975975975974:86:86=;8=:8=:8<:8<:8<:8<:8=;9=;9=;9<:9:97:87987765765765543443433333333222222222001.//.//../,-.,,-+,-*+,()+()*()*')*'(*'(*&(*&(*&(*%')$&($&($&(#&(#%(#%(#%(#%("%'#&)#&(#&(#%("%("%("%("%("%("%("%("%("%(!$' #& #&!$!$!$!$!$"$"$"$ #& #& #& #&!$'!$'!$'"$'"$'"%'$'*%(+'),'*,'*,'*-(*-(*-(+-)+-(*,(*,(*,)+,'(*')*&()'()'()()*()*()*))*)*******+*++,,,-,,---,,+-,,-,,.-,-,+-,+.,+.-+/-,.,*.,+/-+/-+1/-2/-2/-30-30-31.41.41.52.52.62/63/940951:51:61;61;61=83>93>93?94?:4@:4B<6C<6C=6D=6B<5C<5C<5F>7H@9HA9IA9IA9JB:JB:KB:KC:LC:NESI?SI?SI?VKAVLAYNCYNCZNCZOC[OC[OC[OD\PD\PD\PD_RF]QD]QD^QE^QENG?NF?OH@OG@NG@NG?NF?KD>MF?MF?NG@964964964:85:75864864863863863863863:75:75975;86<:7>;9<:7;86;86;86;86753753753753753753964964864864752752742742642642642853753975974974864:86:86<:8<:8<:8<:8<:8<:8<:8=;9<:9<:8;:8;98:98876876776766554544444444334333122001.//-./-./-./+,-+,-*,-)*,)*+(*+(*+()+')+&(*&(*%')%')%')#&(#%(#%(#%("%'"%'"%'"%'"$'!$'!$'!$' #& #&!$'!$' $' #'!$(!$("%"%"%"%!$!$!$!$!$!$!$!$!$!$!$!$!$!$!$"$"% #&!$'!$'$&)&),&),&),&),'*,'*,'*-')+'),'),$&($%'%')$&'%&(%&(%'(&'(&'(&'('(('()(()(()())**++++******,,+,,+++*,++,,+++*,+*,+*-,*-,*.,+.,+/-+/-+0-+0.+1.,1.,2/,2/,3/,51.52.62.62/73/73/951:51:61;61;61<72>93?93?94?:4@:4B<6A;4A;5B;5B<5C<5C<5F>7F?7F?7IA9IA9JB:JB:KB:KC:KC:NE82>83?93A;4A;5B;5B<5C<5C<5F>7F?7F?7G?8IA9JB9JB:KB:KB:KC:NE82>83?93A;5B;5B;5B<5C<5C<5F>7F?7F?7G?8G?8H@8JB:JB:KB:KC:NENF>OG@QIA964:85:75:75:75:75:75863:75:75975974;86;86;86974;86<97:86:85:85964964864864864864742864864863863853753753753753753753864:75975975975:86<:8=;9;98;97:97:87987987876876776766666665555555334333233223012011/01/01.01-./,./,./,-/)+,)*,(*,(*,(*,'*,'),')+&)+&)+&(+%(+%(+$'*$')#&)"%("%("%(!%(!$(!$(!$( #&#&#&"%"%!%### $ $" $ # # # #!%!%!%!%!%!%!%!%!%!%!%!%!%"&"&#&#&#&"%"%"%"%"& # #& #& #& "%!$& #%!#%!#%!#%!$&"$&"$&"$&$&'$&'%&(%&($&'%&'%&''((()*(()''('((((((((((())))))*))**)+*)*)(,+*,+*+*),*),+).,+.-+-,*.,*.,*/-*/-*0-+0-+1.+0-*0-*0-*1.*1.*2.+2.+3/+3/+2.*2.*3.*3/*3/*4/+4/+61,72-94.;50=72>82>82?82A:4C<5F>7F?7F?7G?8G?8H@8H@8JB:KB:KB:KC:NETI>TI>TI>TI?UJ?UJ?RIAOG?NF>NF>:85:75:75:75:75:75:75:75:75974974;86;86;86974974:86:85:85:85964864864864864864864975975863853753753753753753753753753975975975975:86:86986:97:87987987876876776655554665655555555445233233223011/01/01/01./1./1,./,-/+-/*,.(*,(*,(*,'*,'),'),&)+&)+&(+%(+%(+%(+%(+#&)#&)#&)#&)"&)"%)"%)"%)!%)#&#&#&"&"&!% $!%!% # # # # # # ## $ $ $ $ $ $ $!$!$!$!$!%!%!%!%!% $ $ $!%"%"%!$"""!$ #& #& #&!#&!$&!$&#%(#%(#&($&($&(#%'$%'$&'$&(%&(%'(&'(%&'%&'$%&%&&%&&&&'&&'''''''''')))*))**)))(*)(*)(+)(+*)-+*-,*.,+.,+/-+/-+0-+0.+1.,1.,0-+/,)/,*0-*0-*1-*1.*2.*2.+2/+1-*1.*2.*2.*3.*50,51,61,61,61,72-72-72-:4/<60<60=70=71=71>71@93B;4E=6E>6G@8H@8H@8JB:KB:KC:MERH>SH>SH>SI>SI>SI>TI>TI>TI>SJARIAOG?:85:75:75:75:75:75:75;86;86974;86;86;86974974964:85:85:85964864864864864864863863975975975753753753753753753753653643975975975875865976876987986876876776766554444444555455445233223012011/01/01.01./1./1-/0,-/*,.*,-)+-)+-(*,'),'),')+&)+&)+&(+%(+%(+%(+$'+$'*#&)#&)"&)"%)"%)"%)!%)!%( $' #' #'#'#'"&"&!%!$!$!$### $## $ $ $ $ $ $ $ $ $ $ $ $ $ $!$ # # # # $ $ $! !" # #!$!$"% #& #&!#&!$&!$&"$&"$'#&(%')$&(%&(%'(%')%&(%&(%'($%&$%&$%&%%&%&&&&&&&'&''''''''&&&'&&''&(''('')(')('+*)*)'*)(,+)-+)-+).,*.,*.,*/-*/-*1/,2/,2/,30-30-2/+3/,3/,2.+2.+1-)1-*2.*2.*2.*3/*3/*51,61,61,61,93.94.94.<60<60<60=70=71=71>71@93@:3A:3A:3A:3D<5D<5F>7D=5E=5IA8KC:LC:LC:LC;MD;MD;MD;MD;ND;ND;LC:JA8LC:OE;OESH>SH>SH>SI>SI>SI>SJA:75:75:75:75:75:75;96;86;86;86;86;86974974964964:85:85864864864864864863863863853975974974964753753753753653643642975754753653643754865765876776766766544444444334333233233223011001/01/01.01./1-/0-/0-.0*,.*,-)+-)+-)+-'),'),&)+&)+&(+%(+%(+%(+%(+$'+$'*$'*"&)"&)"%)"%)!%)!%) $' $'!$( $( $("&* $( $($($(#(#("&"&!%!%!%!%#'"&"&!%!%!%!%!%!%!%!%!%!%!% $ $ $ $ $!$!$ #   !""!!!!$"$"% #& #&!#&!$&"%(#%("$'#%'$&($&($%'$&'$&($%&$%'%&'%&'%&'&''&''''(&&'%%%%&&&&&&&&'&&''&('&'&%'&&''&('&('&)'&)(&*('+*(-+*.,*.,*/,*/-*0-*0-+1.+1.+1.+40-2/,3/,30,40,2/+3/+3/+4/+40+40,50,50,4/+61,61,72,93.94.94.:4/<60<60?82?92?92>71@93@93A:3?81=70?82@92B;3D=5E=5G?7G?7IA9JA9JA9LC:LC;MD;I@8I@8I@8IA8IA8JA8LC:NE;NE;OE;OESH>:75:75:75:75:75;96;86;86<:7<:7;86974964964964:85<97:85864864864863863863853753974974964864864864864653643642642421542653643543543432665655766666544444444334333233223012011/01/01/01./1./1-/0-/0-.0+-/*+-)+-)+-(+-(*-')+&)+&)+&(+%(+%(+%(+$(+$'*%(,%(+$(+#'*#&*"&*"&*"&*!%("%)!%)!%)!%)!%) $($(#(#(#(#("&"&"&"&"&"&"&#'"&#'#'#'#'#'#'"&"&"&"&!%!%!%!%!%!% $ $#!!"""#!"""! # #!#!#"%"$'!$&!$&"$&"$'!#&"$&"$&"$&#$&#%&$%&$%'$&'%&'%&'&&'%&&%&&&&&&&'''''''&&&%%%&%%&&%'&%'&%'&&('&('&)'&)(&)(&*('*)',*(-+)-+).+).,)0-+0.+1.+0-*0-*1-*1.*1.*2.+2.+40,51,51-51-40+40,50,51,61,61,83.83.;50;5094.:4/<60<60=70?92A:4@92B;4@93@:3?81?81?81@92@92@92D=5E=5G?7G?7G?7C<4D<4D<4F>6F>6F>6I@8I@8I@8I@8IA8LC9ND;NE;NE;NE;NE;OE;OE;OE;8>;8>;8;85:85:85:85:85864:75:75:75:75975975975975974753964864864864864864864864431421421320432431543432432332443666544444444334333233223/00//0./0/01/01./1./1-/0-/0-.0+-/+-/)+-)+-(+-(*-(*-&)+&)+&(+%(+%(+&),%),%(,%(+%(+$(+$(+#&*"&*"&*#'+"&)!%)!%)!%)!%)!%)$($(#(#(#("&"&"&"&"&"&"&"&!&!&"'"'"'#(#("'"'"'"'!& % $ $ $ $ $ $##""## # #""##""" #!$!$ #&"%"%!$!$ #%!#%!#%"$&!#%!#%"#%"#%#%&$%'$%'$%&$%&$%&%%&%&&$%%%%%%%&&&&&&&&&&''&''&(''('''&&('&('&)'&)(&)(&*('*('+)'+)'-+)-+).+).,)-+(.+(.+(.+(0-*1-*1.*2.*2.+2/+3/+3/+4/+40+40,50,72-72-61,61,61,93.94.=71=71>82@:3>82?82;5/=71>71@92@93@93A:3A:3A:3A:3@92@92@92@92B;4C;4C;4C;4C;4C<4D<4D<4D<4F>6F>6F>6F>6I@8I@8MD;MD;MD;PF=PF=PF=PF=PG=PG=PG=QG=:75<96;96;96=:7=:7>;8>;8<:7<97<97:85:85:85864864:75:75975975975975975974974964:86:85:75864864864642421321320310421321321221332322222221333444334333233223/00//0./0./0-/0./1./1-/0-/0-.0+-/+-/*,.)+-(+-(*-(*-'*-&)+&(+&),&),&),%),%(,%(,%(+$(+$(+$'+#'+#'+#'+"&)!%)!%)!%) $( $($(#(#(#(#("&"&"&"&"&"&!&!&!&!&!&"'"'"'"'"'#(#(#(!& % % % % % % $#"!!!"""""!!!!""" # #!$"$"% " #!$"$ "$ "$!$&!#%!#%"#%"#%!#$"#$"#$#%&#$%#$%#$%$$%$%%%%%%%&&&&&&&&&&'&&''&(''('')(')(')('*)'*)()(&*('*('+)'+)'+)'-+),*(-*(-*(-+(.+(.+(/,)/,)/,)0,)2.+2.+3/+3/+3/+40+40+40,72-72-72.83.83.83.72-94.;50;60<60<60<60=70=71=71?92@92@92@93@:3A:3A:3?81=70=70>70>70@92@92B;4C;4C;4C;4C;4C<4C<4C<4D<4F>6F>6H@7H@7H@7MD;OF;8>;8<:7<97<97<97:85964864864864:75975975975975974974964864:85:85975975642642542321320310210210321221211111110100000000222333233223/00//0./0./0-/0-.0-./,./-/0-.0+-/+-/*,.*,.)+-(+-(*-(*-'*,&(+'),&),&),&),%(,%(,%(+$(+$(+$'+#'+$(,#'+"&*"%)!%)!%) $( $(#'$(#(#(#("&"&"&"&"&"&!&!&!&!&!&!&!&"'"'"'"'"'!&!&!&!&!&!&!&!&$$#""! !!    !!!!""!"!$ #!#!#!$!$!$!#!#!#!# "!"!#%!"$!"$ " !" !" !""##"#$##$##$#$$$$$$$$%%%%%%%%%&%%&&%(''((')(')('*)'*)(+)(+)(+*(,*(+)'*(&*(&+(&,*(-*(-+(/,*0-*0-*0-*1.*1.*2.*40,40,40,51-73.73.83.83/84/94/94/94/:5/83.93.94.;50:4.:4/<60<60=60=70=71?92?92D=6B;4B;4@:3A:3?81?81?81?82@92D<5D<5@92D=5D=5E=5E=5E=5E=5E=5E=5E=5E=5E=5F>6F>6H@7LC:LC:LC;LC;OE;8>;8<:7<97<97<97<97964964864864863975975975974974974964864864:75975764642542321320210210210100100211111110100//////...//0122/01/00//0./0./0-/0-.0-./,./,./,-/+-/+-/*-/*,.*,.),.(*-(*-'*-'*,'),&),&),&),%),%(,%(+$(+$(+$(+$'+$(,$(,"&*"&)!%) $( $( $(#'#'#(#(#("&"&"&"&"&"&!&!&!&!&!&!&!&!&!&"'"'!&!&!& % % %!&!&!& % %#""!!!!!        !  !! # #!#!#!$ #!" " "!!!   ! "#"#$!"#!"#!!"!""!""##$#$$$$$$$$%%%%%%%%%&%%&&%'&%'&&''&('&('&)'&)(&*(&*(',*(,*(-+)-+)-+),*(.,)0.+1.+1.+2.+2/+2/,3/,3/,2.+40,40,51,51-51-61-62-83/94/72-72.83.61,71,72-72-94.94.<60>82<60<60=70=71;5/?92@92@92@93@93B;4B;4C<4C<4C<5A:3C<5G@8H@8H@8JB:H@8H@8H@8H@8I@8I@8IA8IA8IA8IA8IA8G?7IA9PF=PF=PG=PG=PG=<96;96=:7>;8>;8<:7<:7<97<97:85:85:8586486386386397597597497496496486486486464264242132110/10/10/0//0/.0/.//./..0/////...---......-..,,---..././0./0./0-.0-./,./,./,-/*,.*,.*+-*,.*,.*,.),.(*-(*-'*,&(+'),&),&),&),%(,%(,%(+$(+$(+$'+$'+$(,#'+#'+"&) $( $( $(#'#'#'"'#("&"&"&"&"&"&"&!&!&!&!&!&!&!&!&!&!&!&!&!& % % % % %$$$!!!""""! !!!!   "!""!!!! !       ! !!"#"###$%$$%$%%#$$$$$$$$%%%%%%%%%&%%&&%'&%'&%''&('&*('*)')(&)(&+*(,*(+)'+)'+)'-+).+).,)0-+2/,1.+1.+2/+2/+3/,3/,30,40,40,40,51-51-61-40+40,50,50,4/+61,61,61,71,72-72-72-94.:4.<60<60>82=60;5/;5/;5/?92?92@92@92@93@93@:3A:3C<4C<4C<4G?8E>6E>6E>6F>6H@8H@8H@8H@8H@8H@8JB:JB:JB:JB:KB:KB:MD;OF=OF=QH?;96;96>;8>;8<:7<:7<97;85:85:85:85:7586386386364297597497496496486464364264242132120/10/10/0//0/.0/.//./../........---------,--***)**))***+++,*+,,-.,-.,-.+-.+,.)+-)+,)*,(*,(*,)+-(+-(*-(*-&)+&)+%'*%(+%(+%(+%(+$'+$'*$'*#'*#'*#&*#&*"&*#'+"&*!$( $( $(#'#'#'#'"'!%"&"&"&"&"&"&"&!&!&!&!&!&!&!&!& % % % % % % % % %""!    !""!!!!""!!!    !!!!          ! ! ! !"#$$$%$%%%%%%%%%&&((()))(('((()(((''*)(*)(+*(*('*)'*)(+)(+*(,*(.,*.,*.,*0.,1.,1/,30-41.62/63/73073041-41-51-51-62.62.62.51-51-61-61-50,50,51,51,83.83.83.:5/:50;5094.94.:4.82-<60<60<60=60=70;5/?92?92?92@92@92@92@93@93@93@:3A:3C;4E=6E=6G?8G?8G?8IA9IA9IA9G@8G@8H@8H@8H@8H@8JB:JB:LD;LD;PG>;96>;8=:7=:7<:7;85;85:85:85:85:75:75:7586386364297497496486464364264254232132010/10/00/0/.0/.//.//./.....-,,,,,------+,,***)**)**))*()*)*+*+,*+,+,-*,-+-.*+-)+-)+,)*,(*,(*,(*,'*,(*-(*-&)+&)+%'*$'*%(+%(+%(+$'+$'*$'*#'*#'*#&*"%)!%)!%)!%(!%( #'"&"&"&"&"& $ $ $!%!%!%!%"&"&"&!&!&!&!&!&!& % % % % %$ % %$""!       !!!!"""!        ! " " !" !" !"!!""#$##$""##$$%%%&&&&&&&&&(('(((**)+*)*)(*)()('+*)+*),*).,*.,+/-+/-+/-+0.+0.,1.,1.,2/,2/,2/,30,63/73074084084073/73/73/84/84/72.51-61-62-62-72-72-72.83.83.83.61,72-72-72-94.94.:4.<60<60>82>82>82?82?82C<5A:4?92?92@92@92@92@93B;4B;4B;4F?7F?7F?7F?7G?7G?7KC;IA9IA9KC;KC;IA9IA9KC;KC;KC;KC;ME<=:7=:7=:7;85;85;85:85:85:85:75:75:75:7586385364297475365364264254253232010/10/10/0//0/.//.//./........,,,,,,,,,+,,******)**))*))*()*()*)*+)*+)+,)*,)*,(*+(*+()+)*,(*,(*,(*,'*,'),')+'*-%(*%'*%'*$'*%(+%(+$'+$'*$'*$'*#'*"%)!$(!$(!%)!%(!%( #'"&"&"&"&!%!$ $ $ $!%!%!%!%!%!%!% % % %$$$ % % % %$"!"""!!        !     "!"!" !" !# !#!" !" !" !"!""!""""#""#"#####$$$%%%&&&'&&(((**)+*)+*),+*,+*+*)-+*-,*.,*-+)-+)/-+/-+0.+2/-20-30-30-30.41.41.51.73073095184084095095183/62.72.73.73.83.83/83/94/94/94/61,61,61,50+50+50+50+72-94.:4.<60<60<60>82>82>82?82B<5E>7E>7C<5C<5C<5C<5E>7E>7E>7F>7F>7H@9H@9H@9H@9H@9H@9LDob(pp)) c = mix(c, (vec4)(cos(time*.3f)*0.5f + 0.5f,cos(time*.2f)*.5f+.5f,sin(time*.3f)*.5f+.5f,1.f),.3f); // Final Color const vec4 color = ((c+(vec4)(f))+(1.f-min(pp.y+1.9f,1.f))*(vec4)(1.f,.8f,.7f,1.f))*min(time*.5f,1.f); const vec4 final = 255.f * max(min(color, (vec4)(1.f)), (vec4)(0.f)); dst[get_global_id(0) + get_global_id(1) * w] = pack_fp4(final); } Release_v0.3/kernels/compiler_ribbon_ref.bmp000066400000000000000000006000661223142177000213570ustar00rootroot00000000000000BM66(!" " " # # # # # " " """"-4     )2"         &/'0%- '        @O##&'+&*.)*/*!#,0, #-2. #.30)9.,=1+;/+;/+0,=/,=/,=/,=/,=/,=/,=/,=/,=/->0*93*93"## # # #!$!$!$!$"$"$"$" $" $# %# %# %# %# %# %# %# %# %# %# %# %# %# %# %" %" $"$"$"$!$!$!$ # # ##"""!-5##'#!%!                (0(2KZ)" 5?'1$,                                        %!!!"!     "&"!!% ## !%"!%!%$(     """!)/" !!!" " " # # # # # # " " """"+5     7@"!         '#+#+")$         &)#(+&)-'"%+/*!$,0,!$+1,=/,=/,=/,=/,=/,=/,=/,=/,=/BZ8*93*93"### # # #!$!$!$!$"$"$" $" $" %" %# %# %# %# %# %# %# %# %# %# %# %# %" %" %" $" $"$!$!$!$ $ # ###""!*.+$(%"&"!%!               =L ">M(11:                                        ")/!  !$    "&"%!%!%" $  $!$!$"%#'    )."( &!""!"  !!!" " " # # # # # # " " """"$+      "! )."     &#*#+")&         &)#'+%#&"%*/*!$!$,1-+/,>/,>/,>/,>/,>/&5-*94""### # $ $!$!$!$!$! $" $" %" %" %" %" %" %# %# %# %# %# %# %" %" %" %" %" %" %"$!$!$!$ $ $ # ###"" $$(%"&"!%!            0,>0,>0,>0,>0,>0,>0,>0,>0.?0*94+94"#### # $ $!$!$!$!$! $" %" %" %" %" %" %" %" %" %" %" %" %" %" %" %" %" %! %!$!$!$!$ $ $ ####"" $!%*&$($!%!#'"          %.;I"IX',7%.                                       )2#  ! #     !%!$!$!% $" #!$!%"%#'    '-'-#!!)/""     !!!!"" " # # # # # # # # " """"!$          !      !(#*"* (         '/$'!)-("%"%+0+,=/,=/,=/,=/,=/,=/->0,>0,>0,>0,>0,>0,>0,>0,>0,>0,>0->0'6/+94###### $ $ $ $!$! $! $! %! %" %" %" %" %" %" %" %" %" %" %" %" %" %" %! %! %! %!$!$ $ $ $####"!!%*'*.*!%!$(#          *4"";J&.                                      ")/!!!    "&"&!%"%#" $!$!$!%!%#'     $!!""!""     !!!!!!!"" " # # # # # # # # " " """ "*3        (.        ("*"*!(       '/%("(,').)*.*+/+,=/,=/,=/,=/,=/,=/-?1,>0,>0,>0,>0->0->0->0->0->0->0->0+94+:4#####$ $ $ $ $! $! %! %! %! %! %" %" %" %" %" %" %" %" %" %" %! %! %! %! %! %!$ $ $ $$###"" ,1-&*'+/,!% %)$ !     !0->0->0->0->0->0->0->0->0->0->0)71+:4#####$$ $ $ $ $! %! %! %! %! %! %! %! %" %" %" %" %" %! %! %! %! %! %! %! % $ $ $ $$###""# (-)+0-+/-!% %)$!     !!!>M                                        #$!!"     !%"%"%!$ !$ $!%!%!%"&    "$)  !")/"$         """!!!!!""" " # # # # # # # # " " """" &                !)"*!)      "% &*$%("$' #& ,=0,=0,=0,=0,=0.?1->0->0->0->0->0->0->0->0->0->0->0->0+:4+:4####$$$ $ $ % % %! %! %! %! %! %! %! %! %! %! %! %! %! %! %! %! %! % % $ $ $$$###"" %)&+/,+/-!$ %)$      "!                                        ! !!"    !!&"&"%"%"& # $ "%!$"!$"&        !!&" '           " """"!!""" " " # # # # # # # # # " " """$+                 !)")") ''.  "% &*%(,')-(,=0,=0,=0,>0->0.?1->0->0->0->0->0->0->0->0->0->02D4*93+:4"###$$$$ $ % % % %! %! %! %! %! %! %! %! %! %! %! %! %! %! %! % % % $ $$$$###"")-)%)&+/,+/,!$ %)$      !!                                      !!"$     $"%" ##& $ !!"%!%"&"& "  &+ #""!&-              ,2 # """""""" " # # # # # # # # # # " " """!$                !(")")!(!(     "%.8'*$%)",>0,>0,>0,>0,>0.?1->0->0->0->0->0->0->0->0->0->0->0.@0+:4+:4###$$$$$ % % % % % %! %! %! %! %! %! %! %! %! %! %! % % % % % %$$$$$###!"'#%)&+/,+/,!$ %(#   !                                       %!(.#   !%"%"&!$ $ "&# #"&     !$$!!")/"6@!                ! # """""" " " # # # # # # # # # # " " """ "                 &!("*"*!(!(      (0"&",>0,>0,>0->0.?1.?1->0->0->0->0->0->0-?0-?0-?0-?05H6+:4+:4$##$$$$$% % % % % % % %! %! %! %! %! %! %! % % % % % % % %%$$$$$##!%)%(,)*/+"&"#&!%(#     !!!"                                        !"$    !%"&"%"%#& "!!%"%!%"% $  "  $ #"!"(2                      " # " " " " " " " # # # # # # # # # # " " """"K^"                '-")"*")!)      "&!->0->0->0->0.@1->0->0-?0-?0-?0-?0-?0-?0-?0-?0-?0.@0+:46B'##$$$$$% % % % % % % % % & & & & & & & % % % % % % % %%$$$$#!).*%)&%)%*.+$($ $$(#$("  %-    "! !                                      (.!"&     "& #!$#& # "&"#"&      !%(-!(.!!#7@"                        \o1 # # # " " " " # # # # # # # # # # # " " " """&-                !(")"*")")      !->0->0->0->0.?1.?1-?0-?0-?0-?0-?0-?0-?0-?0-?0-?05H6+:4+:4$#$$$$$ % % % % % % % % & & & & & & & & & & & & & & % % %%%$$#")-)+/*$(%%)&$($ $ # # $"!!!$+     ! !                                      !# !   EV!%"&#&"& "&"%!%"%   " &+$)!#)/",6                  )2         # # # # # # # # # # # # # # # # # # " " """&                 !(")"*"*"*      ->0->0->0-?0.@1-?0-?0-?0-?0-?0-?0-?0-?0-?0-?0-?00C1+:4BM4#$$$$$ % % % % % & & & & !& !& !& !& !& !& !& !& !& !& !& & & & % % %%$$#"  &-!$($#'#!% ##""")20:  ! !!                                     !%    $,HZ!!%"%"%#& #&"%!%!$     #'  #"$(1                    "$7@"         " # # # # # # # # # # # # # # # # # " " """ '               #+"*"*"*"*       ->0->0->0.@1.?1-?0-?0-?0-?0-?0-?0-?0-?0-?0-?00C1+:4,:5$$$$$% % % % &!& !& !& !& !& !' !' !' !' !' !' !' !' !' !& !&!& & & & % % %%$$#*/))-((,'(,'(1!$ $ #*3%-=G- ! " !!!                                      " '   )2N`&"%"&#&#& #&"&"%"%!  " # !#)/$(2                      !"(2         +5 # # # # # # # # # #!# # # # # # # " " """$+                "*")#*#*#*      ->0->0-?0.?1/@2-?0-?0-?0-?0-?0-?0-?0-?0-?0-?0-?01D2,:4##$$ % % % & &!&!&!& !' !' !' !' !' !' !' !' !' !' !' !' !'!&!&!&!& & & % %%%$#!)-'(-(#(1(0)11; !!   !!!                                     #   2; %"&#&#'  #& "&"%""    $("(#"!"(1                        !!")3        % # # # # # # # #!#!# # # # # # # " " " ""!)                 "*#+#*#+    !-?0-?0-?0/@2/@1-?0-?0-?0.?0.?0.?0.?0.?0.?0.?06H7,:4,:5$$% % % & &!&!&!&!'!' !' !' !' !' "' "' "' "' "' !' !' !'!'!'!'!'!'!'!&!& & & %$#!).'"!&!1;!0; =G.      !!!                                       !)2<!"%"%!%#& #&"% #"%#    #  !!!')0                           (.")/$          & # # # # # # #!#!#!# # # # # # " " " " "#+               "*#*#+08     -?0-?0.?1/A2.@1.?0.?0.?0.?0.?0.?0.?0.?0.?0.?00B1,;5,;5% % % & &!&!&!&!'!'!'!' "' "' "' "( "( "( "( "( "( "( "( "("("("("(!'!'!'!' & &%$#"!!!"!  ""!!!                                        $,.8!%"&"%#&!$ #&"%"%"%"%!%   !&  ")/)/.7                               !!)2         # # # # # #!#!#!#!# # # # # # # " " " "'.                #+#+#+      -?0-?0-?0.@1/A2.@1.?0.?0.?0.?0.?0.?0.?0.?0.?0/B1,:5,;5 % % % & &!&!'!'!'!'"'"'"( #) #) #) #) #) #) #) #) #) #) #)3=&AL4AK4AK42<%2<%@J3@J21;#3>&6@(%$$##""! " "!!                                        %.6?"$' #"%#&#"#&" $ $!%   !$#)! ""$5?!                                   !!"         J\! # # # #!#!#!#!#!# # # # # # # " " " "%,                 #+#+        -?0-?0.?0/A2/A2.?0.?0.?0.?0.?0.?0.?0.?0.@0.@0/A1,;5,;5 % & &!&!'!'"("("("( #) #) #) #) #) #) 6@)#) 3>&BL54>&4>&4>&AL4AK4AK4AK4@K3@J32=$2=$2<$?I11;#1;">H02<$/9!##"!!!$    ! "!"                                      !('06>##&#&"&"%#&#&"%"%"% $    % !!$                 $                    !#        %!# #!#!#!#!#!#!# # # # # # # " " " "'.                           .?0.?0/@10A2/@1.?0.?0.?0.@0.@0.@0.@0.@0.@07I:,:4,;5 & &!&!'"("("(#)#)#)#) :D,5@(4>&4>'BM54?'BL5BL54?'4?&4?&BL5AL4BL4AL4AL4AK3AK3@J3@J2?J2?I1?I1?I11;"0:!=G//9 /8 .7"!  $ #!$!!!                                       #+ 08!%!%"%"&#&!#&#&!%    !   "#"'1               $                      (.).!          #*!#!#!#!#!#!#!# # # # # # # " " " "!)                          /A1.?0.?0/A2/A2.@1.@0.@0.@0.@0.@0.@0.@0.@0.@0.@1,;5,;5!'!'"("(9C+4>&3>&3>&4>&BM5BL54?'4?'4?'4?'BL5BL5BM5BM5AK44?&5@&5?&4?&4?&4?%4?%4>%3>$3>$2<#1<#1;"1;">H0>H/=G//: =F..8.7!  $     "!                                     $-1:!%!%!$!"%"%#&"%#&"% -7      !!$*1                #*"                          !"          '!#!#!#!#!#!# # # # # # # # " " "%,                          .?0.?0/@10A2/A2.@1.@1.@1.@1.@1.@1.@1.@1.@1.@1+:3,;5"'"(2=$3=%3>%4>&4?&BL44?&BL5BL5BM5BM54?&4?&5@'5@'5@'5@'5@&5@&4?&3>%BL4BL43=%2=$@J2@J2?I26@'5@'5?&3=$1<"1;!0;!/9 %AL43>%4>&3>&4?&5@&5@&5@'7B)AL4AL4BL5BM5BM5BM5BM4BL4BL4BL4AL4AL3AK3AK3AK3@K2@J2@J1@J1?J1>H0=G/=G/3=$0: /:;E,-7-6,6   #"" #                                     #+(15>!"%" #"!$""% ##'7@#2<    "%!).!+2               )/)/",5                         "! !          q9!#!#!#!#!# # # # # # # # " " "&                          .?1/@10B30A2.@1.@1.@1.@1.@1.@1.@1.@1.@1.@1(7/,;53=$AK33>%3>%AK3AK4AK4AL4AL4=H/BM5BM5BM5BM5BM5BM5BM5BM5BM4BM44?&CM44?%BM43>%3>$3>$3=$2=#2=#2=#2<"1<">I/>H/>H.$@K33=%3>%BL4BM4BM4BM4BM45@&5?&5@&4?&4?&4?&4?&4?&4?&4?&5?&9D+9D*9D+7B)4?&3=$5@&2=#2<#2<"1<"1%4>%3>$3>$5@&2=#5?%6@&5?&5@&2<"0;!0: 0: /:=F-$4>%4>%4?%4?%:E+:E+4?&4?&4?&5@&5@&5@&5@&CN4CN4CN4CM44?%4?%BM34?%BM33>$3>#3=#2=#2<"2<"1;!5?%5@%5?%1;!.8.8;E+-7+5+4  #&"% # "%             !                       3$AL2AL2AL12="2<"2$4>$4?%4?%5@%5@%5@&5@&5@&5@&5@&5A&5A&5A&AM2@L1AL1BM2@K1@K18C)=H-;F+5@%7B'3>#3>#2=">I.?J/1#1B31B3/A2/@1/@1/@1/@1/@1/@1/@1/A1/A1/A14G3-<54?$5@%7B'BM2"#.8-6,5+4*3   "%!$                                   *4! !!$6?"&/'.   !                                        "!""# '   (1!!            !%-%- '                  *1&&-%GX                            "+4-6-7-7.81C30B2/@1/@1/@1/@1/A1/A1/A1/A1/A1/A1/A1,;5-<5?J.9D)"3>!2< 1< 0;/:/9.8-73="6?%*3  #& $#&#&               #                 +5"  " $,6%-                                            "!#"(*0)/    #Pb(   (2!).!      )2      #*#+ &                    /8                              )2+5,6-7.8.80B2/@1/@1/@1/@1/A1/A1/A1/A1/A1/A1/A1/A1-<5-<66A%6B%7B&7C&7C&?J.8D'@L/9F(:F(:F(;E0!2= 1<0:=H,=G,,6+54=#*1 !  $(#& #'                                =L!   4= 09#,                                            "!!!" $    $%)!%    5?!!).).      '0 "    $,&.")                                                (18B':D(-7.7=F+0B2/@1/@1/A1/A1/A1/A1/A1/A1/A1/A1/A1/A1;N<-<67B%9E&9E'+19'1:'1:'09&7?,7@-9A.2;(9B.7@,4=(4<(6?*:C-:D-9E$6B"5A"3> 2= 7A%0:3=!2*3;(09&.7$/8%09&19&2;(3<)5>+6?,:C0:B0:C08A.8A-/7$.6#/8$-5"/7$2:&08$3<'4=(2:%7@*8A+3> ?J.%9B':D(H+/A1/A1/A1/A1/A1/A1/A1/A1/A1/A1/A1/A1/A1-<6-<69A.9B/7@-7?-3;)/7%.7$,5",5",5",5",5",5",4!,4!+4!+4!2;'6?,6?+8A-09%.6"*2+3-5!6?)5=(08#3;&4<'2:%(/!'%##,7$(#'#                                  " "+/8%.7$-6#-6#,5!,4!,4!+4 +3 +3 *3*2/7#.6"4<(2:&/7#*2)1*2(/19%")&-&$!(/:@L'4@,6                                    )3                                         ").#!     !&*$( $ $  18!/6 .5             !).',$     ")&/+3 /8&.                                            '0)29B'-7/90:1<2=4? /A1/A1/A1/A1/A1/A1/A1/A1/A1/A1/A14F6-<6-<64=*7@-7@-8@-4=*1:'/8%,5",5",5",5",5",5",5"19&3;(6>+5=*1:&,5!+4 *2)2)1(1(0'/'/19%%,$+$+!( &%*3?J%>J$.9      &/%.%.                         !DS!(2                                         "(.# "!$   ##'!%"!!'-306 .4                   ).!    %#+&.#+    ;I$,                                                (19B&:D'/:2=2=5A5@ :G#/A1/A1/A1/A1/A1/A1/A1/A1/A1/A1/A10A1-<6-<6,5",5"-5"-6"-6#-6#-6#-6#-6#-6#-6"-6"-5",5",5!,5!+4 +4 +3,5!.7#3<(19%08$(0(0'/&.%-)0#*!( ' '3?8D8D,6   %-&.2; &/&.'/&/                     !  ?O&/                                        "!'# #( #   !%!% &,07!&                      !     '%-(0)1    8A")2                                            ,6*3&*3-60;J\'K](L_)Ug209#0B2/A1/A1/A1/A1/A1/A1/A1/A1/A1/A22D46H9-<6-5"-6"-6#-6#1:'3;(3<(3<)3<(2;'1:&/8$.7#/8$-6",5!,5!,5 +4 +3+3*2)2)1(0,4+3-5 %-$,#*.6!!("*Ob#CO(1<&.  &.%.4M                                       #!'# (-  "&!# .4 %%                                 !)&.      =L%.                                           *3# "$,,61<K\&M_(Pc*2:$*119%/A1/A1/A1/A1/A1/A1/A1/A1/A1/A12C44F6-=6.=63<)/8%.7#.7#.7#.7#.7#.7#.7#.7#.7#.7#.7"-6"-6"-6"09%/8#-6!+4+3*3)2)1(0'/&.&.%,)1"*")#+s:f{7.9! )2-5&.&/'0&/4=!5>!&/5>"'/                  !                                         "#" %*  !%#'"&# # *0%&                                      #+'0     ;I                                        )2&.$"%$,6CM_'i32:$)119%'/+3/A2/A2/A2/A1/A1/A1/A1/A20B22D44F6/A1.=6.=6.8#/8$/8$/8$/8$/8$/8$/8$/8$/8#/8#.8#.7#.7"-7"-6!,5!,5 +4+4.6!-5 *2)1+4'/+3%-2:$#+ #-5)0d},$,!  &2:/8&/'0'0(1'05>!)2'0'0'0                ! !                                     "! !#   "&!%!%  -4+2%                                       $,1:                                        '0&/%.   !#+ '$%!'%,-5.6 '/)1'/3;&+3(0/A2/A2/A2/A2/A2/A2/A20A22D44F65G78J9.=6/8$/9$1:&2<'3<'2;'1;&1;&2;&2;&1;&1;%/8#/8#.8".7".7!-6!,5 ,5+4*3*2)1(1(0'/4<&&.'.#+*1.5DU&!$.64< 19'0'0(1)2'0(15>!'0'05>"5>!               !                                       !(-!     !$"!% #"-4,3         -4                              "*&/>M                                       (1'05="2:!%.&"* '%,2"*%,-4&-(0/7"'/(0)1)1/A2/A2/A2/A2/A2/A20B23E55G75G73E5-=6.=60:%0:%1:%1;%1;%1;%1;%1;%1:%1:%0:$09$09#/9#/8"/8".7!-7 -6 ,5+4+4*3)2)1(0(14<%%-$,-4/5&/%"#)084=!2;6?"*46?"'1)2(1(15>"'0'05>"5>!                                                     !(.      "&"%"!$&-+2          &.518!29                                 #+9G8B#                                    (1(1'04="&/%. #!(!( '/5 ")$,%-,3&.'/(0+3.6")2*3/A2/A2/A2/A20A20B33E55G75G73E50B2.=6.=62<&2<&3=&3=&3=&3=&3=&3=&2<&2<%2<%1;$1;$0:#/9#/8".8!.7 -6 ,5,5+4*3*3(1(0(0+3,407 /6 "*%#$+087@#4=(2.87@"6@"(1(1(16?"6?"'0(1'0'0            !""                                     )/! "(    "&"% #$&         ,3/6 07 2:  !                                  $,"&/#"&")")!(#*$,%-&-'.,4+3(18@+2:%+39J;/A2/A20A20A20B23E55G75G73E51C31D2.=63='4>'4>'4?'4?(4?'5?'5?'5@(8B*4>&4>&3=%2<$1;$1;#09"/9!.8 -7 -6,5+4*3*2)2(0(1.607 .6")%,2#*2:*45? )309)37@"(2(2(1(16?"6?"'0+3(1'0         !                                    !!!!   ""&"% $"*0$'          &&/6 07!  ""&                                    '/                              6?#6?#7@#)27@#)26?"&.#%!)"*#*#+$,&-)1'/'/7?))1*2,5 /8",5 0B2/A20A20A22D44F65G74F61C30B20B2/@4.=6DS.ET/GV1JY3KZ5ET/ET/ET.DS.KY4M\6HV1CQ,HW14>%3>$1;#0:"0:!.8 .7-6,5,5-6*39B'3;"+2/6!)&+2")2: *4+5*4*4*4*3)3)2(2(2(16?"5>"'0&/4"'0&.&/     !  " #!                                   )/!#   # #"%#&+1,3         ,3,3%-4    $"&&*(-                                                                      7@#)27@#)3*3+4+5,6#*%/5 #+$,'0'0'0(0*2)1)2*3+4,5-6 .7!3<&/9#0A20A22D44F75G74F61C30B20B20B20B2.=6TeM#CQ&@O#3;"/63:!"*!( ' '!)08;IL\(DTDT.9.8,6*4*3)2)2(2'1&/%.&.   "%"%"! "                                   !&   GY $ $#'! ',2          ,3$%+  "!%#& %',      "*                                                                  6?#(1(26?!)3*4,6FW GX!FW !(!($,%-.7;H;I+4*3.7.7 +4,5-6 .8!09"1:#:D,9C+4?&4F75G74F61C30B20B20B20B20B20B2/>6/>6LXKKXKKXKduLBL:BM:CN9DO9]oCby +4-8GX!J\"bz,&.!($,9G.6Rg&>LDR(+4+4,5-6.7.8 6@(8B*1;$3=%5@'ET.GV/5G73E50B20B20B20B20B20B20B20B2/?6FC?I6CM8\lBd{'/8!0:"2<#3>%DR,FU.HW0JZ1FP93E51B30B20B20B20B20B20B21C21D3/?6P^Q5?-?I6CM7tBmD`v8]s6]r6Zo3Yn1]r5Wl/Xm.Zo.3;""*07.7$,#+!(!( '!(&-@N!M\+Ma#GY K\%Qb,/:+5)32<'0*2 !5?"&!%  "!$#&                                     ,6$#&#&%.)0        ,2*0)0!!!% $##'     !&,"%*0""!    !!)/                                                             !(1+409*3+4,3+2/6 (/#*)0!(")$,9F/719g0>M!=J"-6-7.8/9 1:";E,AK35?'FU.O_6GR;EP:BL81D30B20B20B20B20B20B21B21C21C2/A2BY:5?-?I5CM6PwDax8^t5]s5^s5Zp1[q0j42;""**16B#+"*!)!(!( ''.!(")BQ"Rf%FW DU2>,76@!7@#+3 &*41=#'!  #"%#&                                   $+'009"%"%+5*1          '%,2 !%!%"&#'      #"(!'-)/")/!         %! !                                                    &/5>"(2(2)2(1(109(1(1,5#)#)$*(.'.)/$+ ' '+2")$,:G19 18 g0@N">L"7@&4=$=F.@I16@(2<$4?&FQ8Wg?EP9DN9CM;=D;Q]R0B20B20B20B20B20B21C21C21C21C3D[;CZ:4>+?I4vM{HvCp=o;n:m8s=5>%$+,39F%-#+")!)!(!( '!(!(!((/GV'JY)HV)/:*409")!#1=%* #! #"%#&                                   .8!$4>)0         +1$)/ $!%""&#    #'*0!"!)/!        FW     %!!                                                 %-3;"6?")27@#)2)26?"3<)2+4#)$$$%%&!(*1,3!(#*$,:G2: 19!6?%JY+?M#.7.8/9 1:"@J1DN69D,DO8DN9CM9@J8WcX[h]0B20B20B20B20B21B21C24F56H81D3DZ;p=~Jn9l7h~=&.$,;JAN%$,"*")!)!(!( ' '&-!(")?M 8E3?08&-*0#/: "&" #"&"&                                &05>"!$"")1         %'-$* !%"&#'    ""&%*  )/"$"           "&/    #!                                              +4  &.'0(1*3*3,6)23<)2(.%$$ &(.,2*1"( & '!(")$,;H2: 18"5=%")2*4*4*4*4-6#)$$$+*0!(%&& '!(")"*$,;I3;"07!4;$9B)=G-3<$5>&6?'@I2BL5AK5@J5@J6:D3R_T1C31C31C31C31C31C33E56H77I9F]=9C/7A,G/?I1@J30;$"#++4.8/:.9.8'.%(.%%&&(/!( '.5")#*)1;I2:"17"5<&9B*7@):C-;D.>G1AJ5AK6AK8EN>\i_1C31C31C31C31C31C31C31C31C3/@2F]<4>,1;'3<'2;%-619#'.JY+G1?H3AJ6@I6?H8Xd[̲ͧ1D31D32D32D32D32D32D32D32D3E\;4=+5>*4=(2:%,4r:=L9F'0$,#+#*"*"*")")")"*#*$,07 /5 -3!-7 &.%.                                  '.&/"%#&Rd+    #$  (.!)/                           %! " # # #!#!$!$!$!$!# #"  -6%.                                      .9 !&!")+3!( '!(+2+2!'& &'. & ' '!(&-!(07 #*$,&.9A&18#4;&8@*8@*):C.=F1?G3?H5@I7^ja|Ĥťɭӹ6H75G63E42D32D32D32E32E31B3Ja@/8%,4!")e-=L9G8E&.%-%-%-&.8E9F6>#.6!)/6!)&-)/&/'15>"'0'0                    "        #"%08'1"*  !$$!!!)/                                       /6 ,2&-!" # #!$!$!$!$!$!$!$ # "                                  (04<"$!!'+2!'%+'.)1%-#+&-!(!(!(&-!(07 ")&.#*$+,3+3,429"4;'8@,G2?H5BK:^j`z{ԣŤɦͫ5G65H75G63E42E42E42E42E4Ia?ҵگ2;(,4!+3Vk';J9F8E'/&.&.9F:H;I18 +2.5$,#*$08(1+56@"(1(1                   #!        !!%3</8!)  #,$!!                                    )/"   GX     3:!-4 ! " #!#!$!$!$!$!$!$!$ #!                               6?#'0'0"*# ''/ '%,)1:H(1$,#*")")")")"*%-#*$+*2%-%-3;"28$7?*;C.=E1?H5AJ9Zf\amf|züңţĄ̇еD[;2E44F54G64F53E43E43E43E45H5دյڥȻ֒4<+!',5Rf%;I9F8E8E8F9GF3@I6DM>\h_vvyøҧšƱϲѨг3E43E43E43E43E43E43E43F4ҷݳأƸԕ5=-*1&.M&.$,$+$+(/'.$,&.%-19 19!28$:A-=E2>G4CL<]i`bnfsz}}ö϶ҧ̯ϩѩѴ3E43E43E43E43E43E43E43E44F4ڶܯԣŸӕs"#+%-&/&/;IRf%t<%,")6?$#+!(*29GJY)GY FW -8+5*3)3)2           #&         !8B#@O                                       #""#% &$)      *41;',      !!"!(     "*&/#+ ! " #!$!$!$"$"$"%"%"%!$ "                         *4,6FW 9G!("*(1.5,32:!f.:G%-*2%-$,%,*2&.3;"4<#18!8@,;C0>F4BK9[g^`ldq}wz|̵ϯΡŭ̬ΥͩѴ3F43F43F43F43F43F43F46I7޵ڢDzжЯĝsm~}&,%-$,)1&.*2+2 '!(:G#+!('.%,L[*j5L_"GX -8*4)3)3(2(15>"*23;!          "F3BJ9Zf[]j`bnftwx|ö϶ӲˡƩ˨ͫԧоDZ;3F43F43F43F43F43F43F43F4ۻ߶޸ߴӲץǶӰȟ{wup~")&-$,$,$,$+ &(/;I#+!( ' '")e{4d},b{+GX -8+5)3(26?"5>"&/'/  #'"       "                                      (.#(-%*##' #     '+#'#  8A#2:!/7                     $,(2! #!#!$"$"%"%"%"%"%"%!$"                   *3-7e-+2!(#*7D4=!29"29"5<#6>$%-+3%-%--4+23;#07 :B.=E2@I6DL>\i^_lcangq|uz{±ɶϫȯˠƦʢʦѤΦЭ4F44F44F44F44F44F44F43E5ӸϫͱעƵӷҰƪ›wto}k|yn~{&-%,'.'/#* &+39F#*!( '& &&s;t"&/&/ :E#'      !                                    )/##(.%+'-"&    !$("&!$"  18 07 .5                       &.;J  "!#"$"$"%"%"%"%"%"%"$ #                    "$"('.!(")%-Pd%2:".52:"4<#4<#)1-52:!-43:"2:"29##)2*3*3 "##$!'(/ '!(#+F3AJ8EN>ZeZ\h]_k`andcqghumm{ut||yovwvs}xvw{~ɱʴʹ5G55G55G55G55G55G54F5̰Ȭūxxx|zgvqXe`Yf^'.)0)0(/:H#+!( (&&&%& '#*!(+2&s;*0.7    !#"% #                                !)/!!'"      $"%!%,2+1                                                  "!$"%"%"%#&#&#&#&#&"%"$)0;""'0*3,6,6+5#!"#%(.%%& '#*#*-52: 06"18"2:"+2+318 29"4:%@H6DMF3AJ8GO?T^SYdX[gZ[g[]j^anbbpccqe]l^nhcrik{riypwk|tm~v}~stttƳ}vts~r|pznwl~tk|rx`og`pdpfUbXR_TR_TUaV'-(0$,#*")!(!( ( (!("*(019 /7     #"&#'                              $;G      +5$"% $$&                           !!                                           "$  #"%$'#&#&$'$'$'/8 & '#*")")")%,-3%%% & ' '!(")#**1,318"18"19!29"/639%@I6DL;JSDVaUYdWXdW[h[_k_]j]^l_]k^\k]^n_bsccrhhwojzrgwoiyrk|um~woyq|r~sttt~~մ껜xuutr}q{oym~vywu}crjm|q{todUcWYfZP\QNZNR^R$+$,#+"*")!)!(!)#+'08A%'.  !%#'$)                                  !)8A#4=!#& ##'%-)0,2.5                                %"(-!                                          #"&$'#&$'$'$'$'(1 '&-$+#*(/!' '& & ' '!(")$+#+$+-518"18"08 /629#@H5CK9FO?MWJVaTUaTZgYYeXXdW\i\bpcXfY\j]Zh[\k^_na_ndftmfuoeungvpizsl|vnxozq|r~stts{v~yz|}uuts~q|pznwl|ujzrgwopxetln}sbrb_o^YgYUcVR_SS`SP]QKWK$+#+#*"*")")#+8FJX+/6KY+   .7%)&+0;                                $-8B$19$("%"%)1*1.4                                  DU      ")$!                                       #"&$'$'$'$'$(1<"*#+9F%-(/&-!'!'!'!(")$+#*$+%,2:!18".6-529#?H4BK8ENL 8E'.&-!(!(")#*#*$+$+.5%-07!-529"?G3AI6CL9FP>ISCPZLT_PWcTMZJQ^NS`QUbSZgY_m_TbTVcVWeX^m`]k_[j^\j`_nc]kedrm|fupixsk|vn~xozq|r~stttsqouvw|twxutsr}p{oym~wjzthxqgwor{jyqn|t^mb\k_[j\YgZWeWUbUS`SQ^PO\NNZLLYJJVH )1%-#+#+$,>MGU( &  >I'*1.44@                              '07@!2; #"!ES(                                         $+(..4   ,4&/")                                      .8-7$%+5$($'%(%(ET$'/=K=K$,#*")")#*$+$,%-%-18 06"29"=F2G4AJ7DNM$,#+#+$+$,%,,4+3)129":C.9B.G3BK7DN:DN:@J6AK8AK8BM:EQ=GS@HSBHTDJUFKWH\hYVbSO\MQ^OS`RSaRUbT[i[WeW]k^Yg[Zh]Zh^Zh`hygzpcqlcqll~pl}qetphwsk{um~woyp{q|r}s~ttuuuuuutts~r|q{pyn~wl|uiysgvqqsr{pxlzsli\j`[i^Zi\YgZXfYVdWUcUSaSR_QQ]OO\MNZKLXIKWGIUEITCIUAhxUAL9(."**1%,(/$) '0708&$19!&/'0                     !                                           -4+2,3.5.508 19 !$!$!$!$!$!$ #!$!$!$!# #!  ,4=L                              &/%-#%"*(/%,BP"&-&*/5"18"3;#.6.6+3)1&..60818#@J4AK4?I3;E/;E/;E1DN9>I4>I5@J7AL9FQ?DO=DO?FPAGSDITFLWILXJNZK^k\Q^OTaRVcUUbTerdWeWXfYYg[Zh\Zh^Zg_Zgaeugcqldrmbpliyocqnetphxsk{um}wnxpzq{r|r|s}s}s}s|r|q{qzoynwl|vjzthwrrspypxn|ui{h\ja[i^Zi]Yh[ftfWeWVcUTbSS`QQ^PP\N\hZLYJKWHIUFHSDFRAEQ?DOCN@EPBMZGJUGKWIMYKO\MP]NR_PboaUbTVcUZgYXfXYgZZh\Zh]Zh_Zg`[hbdtgbojbplgurfunanmcrofurixsk{ul|vm~wn~wnxnxn~wm~wl|vk{tixsgvqetootr{qykzszp^mc\j`[i^[i\Zh[YgYWeWVdUTbSS`RQ^PP\NN[LLYJKVHITFGRCEPAFQACN=AL:?J8>H6=G4;E2>H5#*+2 %-DR&"*(//6-8*4)3                 #!                                    ,3+2-4,2.5,306!!' "!# #!%"&$(!$!$!$!$!$!$!$!$!$!$!$!$!$!$!$"$"$!$!$!$ #"                             )3*3,6!(!(%-&.08 18#.4!06"18!19 &.'/(0'/>H1Wh:Vg9]nAaqE=H/H3@J6MXE?J7@J9@K:@J;@J=AK?CMAEPCNZHJVHLXJP\NP\NQ^OVcTTaSWeVWeVXfXYgYZh[[i][i^[h_Zh`Zgb`of_li]kgdrodqp[hj\ik^kmbpndspfuqtespjxv`nm_mliyoq{pyp~xiziyx\ja\j_\j^[i\ZhZYgY\j\VdUUbSSaRR_PP]NNZLLXJJVGHSEFPCEPBBM>AL<@K:?I8>H6=G5L2:".4.4!/5 08/8/7(0,4Xi;Vg9Uf9Sd8Vf:?K/>H/;F.;E.;E/?I3?I4H4>H5?I6?I8UaKFR=?H;?I=AK?CLAEPDHTFKWIMYKO[MQ]NR_PTaRUcTWdUXfWYgYZhZ[i\[j]\j^\i_[h`ZgaXebWcb^jj^jjXde\hjXcf_iqfqvhtvgtsjwukxufvi_me\jb\ja\j`\j_\j][i\ZhZYgXcqbWdUUcSan_R_OP\MNZKLXIIUGGRDEOBDN@BL=@J;@J:?I8>H6=G4duF=G-H5?I7?I8EP<>H:>H<@I>BK@CMBFPDITGKWINZKP\MR^OSaQUbSVdTXfVYgXZhY[i[\j\\j]]k^]k_\j`\ia^lbivpWcaT``VabU``XdbYea^le^lc]ka]k`]l_]k^\k\\j[`n_YhXXfV]jZbp`S`QQ^OO\MMYJKVHHTFFQCFPAbpVAKH7>G5=G4FP=;E1:D09C/:C/:C/7A,7@,7@+7@+Zn,@M"!( '&!(KZ*FW CT+5)3'0'0*2    ##'"%!                               ,3%*0$+2! ! $!$!$""&"&$)#' "         #!#!#!$!$!$!$!$!$!$!$!$!$!$!$!$!$!$!$!$!$!$"$"$"$"%"%"%"%#%"%"%!$!$""$                    /:/6,3!(")$,f.29!17"/709)2dwBXiH4>H6?I7EQ;FQ<>H:?HH6=G5G2:C.:C.$(                             %,2,3(.)/(.!%"% $# # $ "&!%"&!%               #!#!#!$!$!$!#!$!$!$!$!$!$!$!$!$!$!$!$"$"$"$"$"$"$"$"%"%"%"%"%"%"%#&#&"%"%"%"               %$ & & '")$,?N/7(,19kGcwAWh;Rb7P_5L[3IW1ER/?J,;F*:D)9B)7A(7@(7@)7@)7@*7A*8A+8A,9B,9C-;E0=G2H4>H6DNH5=G4)5>(5=(4='4='4=' %-") '&%% ':H7D0907!' &*4@L'',#'                          -3-4+1*1'. #&"&"%!!%!%"&"& $                 #&/&/" " " # # #!#!#!$!$!$!$!$!$!$!$!$!$"$"$"$"$"$"%"%"%"%"%"%"%"%"%"%"%"%#%#&#&#&$&"% # "0;&/'/(18A#+4  (.##$%(/ '")$,*207 \kFjEdxAVf9P`5LZ2HU0DP-@L+=H):D'8B&6?&5>%5>%5=%5=&8A*5>'5>'6?(6?)7@*8A+;E/EN8:C.;D/H4=G3)4=(4<'3;&2;%2:$5='2:$2;$#+!( '&&&$+'.&.&#) 'ER),1%*!                           .4+2)0&.+3"#&"% #"%  $! $!%"          "!##(%+%% '''!)!)!'% !!" " " # "!#!#!$!$!$!$!$!$!$!$"$"$"%"%"%"%"%"%"%"%"%"%"%"%"%"%"%#%#&#&#&$'#&#&"%" 5? 1=%"$&-*3.8GX!d}-t<$#$(.% '!(#**2.5pJh}Ddx@Ue8L[3HU/EQ-AM*=I(;E&8B$5>#3<"2:!19!19!19!19"19#2:#3;$3<%4=&5='6>(6?)7@*8A+9B,HQ<;D/@I4=G1>H3?I4?I6@J7@K8AK:AK;AK=SaGMZDcuNHSEJVGMYIP\LR_NTbOVdQXfRZhT[jU\lVm]m]]lV[jUZiTXgSVdQUbOR`NP]KP\KKWGIUEVdJQ_EDO>BMH3=G2(4='3<&?H21:$19#08"/7!/6 .6 .6.6/6#*") ' '& & ((/")!)07/5 &-3<                       )1DQ( $!%"%#&"&!%           ! $ "%"&#($)!'*0!(.#"#)/$$% ' '  %+)/%+,3%, !! " " # #!#!#!$!$!$"$!$"$"$"%"%"%"%"%"%"%"%"%"%"%"%"%"%#%#&#&#&#&#&$'$'#'#'"%)0#+ &&!((0;I+2&$%%&#*!("*$+%-lFg|Bdx?Q`5LZ3ER-AM)>J'J$FQ0,3+1*0*0*1+1,3-4.5 19#08"2:#3;$:B,5='6>(7@)8A*9B,:C-@J4H3?I4@K6AK7BL8BL:BL;CM=CN>EO@FQBHRDJUFMXHO[JQ^LS`NUbOVcPVdPVdPVdPUcOTaNR_MP\KR^MKWGJUEHTCGR@EP>ISACN;BM9AL7@K6?J4>H2=G1;E/:D.9B,8A+6?)5>(4<&3;%2:$08"/7!.5 ,4+2*0)/(.'-'-(.)/'/")!( ' '!("*#+5=#.6.5:D%                     " #!$ !%4I$=I$=H%7A!DN07A!+2*0(.'-&+%*#(#'$)5:(*0DN/H1?I3@K4AL6BM7CM9CN:DN;DO=DO>EO?FPAGQCHSDJUFLWGMYHNZINZINZIMYHKWGKVFITCHSBLWEFQ>ITAEO;DO:CN8BM6AL5@J3>H1=G0;E.:D,9B+7@)6>(4=&3;%19#/7!.5 =F+;C*:A+7=).4"',!%"&#($)%+',(.+3"*")!(")%.9G4<"$,IV+                 !"!$-55="8A#,6&/"*      !  #"%!%!&!&&+#%*#)  %'-)/!'!  ! !!!""#(/%'!)"*%.  !FW '+"&     !" " #!$!$!$!$"%"%"%"%"%"%"%#%#&#&#&#&#&#&#&#&#&#&#&#&$'$'$'%(2<+5%,)1*3&/$+")!(!'!'!(")#+%,+3jCg|?dxH0?I1AK3BM4CN6DO7EP9EQ:FQ;FQ=FR>GQ?FQ@T_OFPAQ_FQ_FFQAJTCGR@JUBKWCGR=FR!7@"1:%.%-     ! "%!$#' %"$%+   %#(',$*%+# &"#$*"()/+0+1&,%+%&$$%!)'+#         &/'0+4$,+2"&    ! # #!#"$"$"$"%"%#%"%#%#%#&#&#&#&#&#&#&#&#&#&$'$'$'$'$'%)&+>M*1/7;Iey:Q]7P[7P[6?K%=H$;F#9D"7A!5> 3;1:08/7/7.6.5-5-4,3+2*0-57=)&+$($($)%*',(.+2.6 19"3;$5>&7@'8B):D+H.@J0AL2DO4EQ5HT7gxNIV:JV;JVJV>JV>JV>JV>JV>JW=JWWgH.M'1%.      !"&#+'#"!!          "#)(-&+"!'*/!'#(&+'+!%##                   !)#+%.'/*3$,(/     "!# #!$"%"%"%"%#&#&#&#&#&#&#&$&$'$'$'$'$'$'$'$'$'%('+07!18"3;"f.&.$+$+$,&.(0+5oEh~=Wd%8A':C)I-Sd7Vf9Xi;Zk<]o?vJxKzM{N|O}O|OzNxMWXxMat@\n>]n?Wh9brERb5=H+:D)7A&5>$2;"07 -5,3,3+2*018#39%09?H-+2+2,3,3,3,3,4,4,4-4-5-5.52:!0819.6"*!(%,#)         !!!GW"AQ           !%.#+ '&##""#      !&,! #',$',"'$)$(# $"             !!"%!)            "*$,'0I%;F#8A"6?!3< 2;1:080807/7/7/7/7/7/6/6/6078@'/7/7/7/7/7/7/7/7/6.6.5.5/607193; 6?#:D&N]0Yk6M\2O]4Qa7Td9Yj$192:4=  )0"  # #!$!$!#                   6B8D6C!) '%%%$##"" !"'!%!%"" "            #' (-!',$)&*%)#'(-'-  !!!!"""#%&                           ! #!$#&"&#&$'$'$'%($($(%(%(%(%(%('+-3 /6 07-6Sb1q[jzNxHav5Zl1N^,IW*ER(AM&>I%;F$9C"7@!5>!3; 1:0908/7/7/7/6/6/6/67?&4<#3:"3;"5=$:A(2:!0808081919191:2:2:2: 2: 2: 2; 4"9C#DQ*JX.P`3j@}IdtKĵŵZ|GqHN\1JV0=G'5?!DL08@%6?#2; 2:2:19190908/7/7/6.6.6.5-5-5-5-4-4-4-4-4-4-4-5.50808192;6?!/7"!'!:D$)3(1"&!%!$"&"&"&                     I%!6?!6@"7@"8A#:D$CO(M\,Yg=¦Ĩ^q3ER(!4= 3< 2;1:190808/7/6.6.5-5-4,3+2*1*0)/(.(.(.(.)/*14= 8C"Sd,6@!!6?!8B"M\+N^,P`,Sc.GT)GT)T_BHV*ER(Xj0Sd-O_,M]+K[*6?!4= 3; 2:1908/7.6-41:08,3-2"04'"#), 4> '+-2#27&5:)5>!Xj/5=#;D&HV((.!'!(&.&-.5 /5!9C"@K'5@                                           $ "& $"%!% " "!%!$!!$  &-3/5 -4.5-4.5-3,3%,3*1&-&                                                   /:3>-6" & &")-7DT l4/5"-3 .4!.4-5}cmoqsu{br@Vg1dy6P\4P[7NZ5NY4Sd-=H$8B";D&:D$5?!,3+2*0(.&,$( $"&*'+$).5-3)/+2.507193; 4!7A"M]+i6s;N\1GT)IW*`t4y{~t 8A$3;05$05%-2$ $$'!!%$)&+'-(/)0,34= 5="4= %,%!)/7.6/6t<1<%                                  #+(1.7)2:D$8A# !% $!%"%"%!%"%"&"  *0+1*0$%%,3,2%+2&(/                                                          *4Pc$-5*1$+!($,#*/7 18"07".5 )2xb{d}g}hjlnp_n!3< @I.4<#9@(^o9[k97A!1:(.(.-4-58B!+2+2,3,3,4-4-4-5.5.618 082: '.53;"*2")&#$)                            ,5&/'0*46?!2;4ey8DP(AL&>I%;E$7A"5>!3< 2; 191908080807/7/7/707/7/7/72:!/6/6.6.5.5.5.5/6084<#3; :C':E$AM'GU)N]-kyVq`tcwfzh}knpyz{|Šzyqn}kzhwetH(FN2AI.;C)08/7.6.6.5.5.5.5.5-5-5-5-5-5-5-5.5.5.6.6/6/72: '.FT(GT).6 (%#"!                      BR KZ)6@!6?!5>"# # # # # ##+(/)0%,3-4,2,3-3/6                          $"))/,207     !(/%,'/+3.7)2(1'0                           -5&.!)%%% &!(#+7D!4=!3; 2: 19190808080707070707070708085<#081919I%;E$9C#7A"5>!3; 1908/7/7/6/6/6/6/7/7/707081808191919 3;!5=#3; 4`o@brBdtCfvEhyGTe0Te/MY1@L&@L&wEMX5FP1;D(9A'3:$5;&7>)8>*7<*48)$()-! ##(&,-418/608192: 2; 3"9C$s;O]2GT)_n>Yh<\k>^m@`pCcsEgxH|RUX^`a]^_doYW}R|RduE`pB]m@HV*FS(oBN^+4=!3; 2: 1908/7.53; 4= (-#'#BO&!'+,1!17%5;(8>*;A-;B,6=%8@% #*"*")!((/&-#)+1!#)     #'"%! # # #!                                                    !#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!!#!# # # # " " " # ""!!!            "$% '!((/")+2")")$,&-(/'/hxEbr?ap?lzLdpHR^9Ua:Ua:Ua:|IQ]7IS1FP/EO.}GM[09A':A(:A):B):B*;B*'8?(:A*;C+=D,?G.EO-")"*#*")!( '$+&!(Wl*CO)>I%;E$%)"&#&!$!                                                             !#!#!#!$!$!$!$!$!$!$!$!$!$! #!#!$!$!$!$!$!$!$!$!$!$!$!$"$"%!$!$!$!#!#!$"%!$ #""!! !" +16@ 5@Rf&"*!( ' '!(")#*#*"*"*")%,#+&-k{HfvAevAeuAeu@n~Ieu@du@l}H`o>R^8OZ6HT/q?IW)7A"3< /6,2*0)/'-%*!% !%!#!%(-',)/&8@(5<%4;$5<(6<*38).2%N[1 $ #!%$)&+',(.*0-51:6@!")#+9F6C,5%-19 /7/5 -3+1! #!$"                                                                      # # # # #!#!#!#!$!$!$!$!$!$!$!$!$!$!$!$!$"$"$"$"$"%"%"%"%!$"%"%"%"%"%"%"%"%"%#%#&#&#&"%"%#&"&"%"% %4= 6@"#%#)#*$+'.)1(19F9G:H(0&-#*&.'/fvAevAeuAeuArNeuAeuAevAoKez7BO'>I&;F$9C#5>!3< 2: 1918080807/7/7/7/6.6.6.5-4=E-08+2,3-4.6088@&>F+CL/Yg9GU)Ub6Uc7Vd7We7Xf8Yh9[j9^n;vEJnBSz}щĄ~oCyMUsC[k:Yh8Xf8Ve7GT)JT3CL0DM1BK03;".6-4.5ES'19;B,-4-4-4-4-4-5-5.5.5.6/6/70719")%-cw5FT)3;!07 %,.5%.*3(26?"                                                                           !!!"""" " # # #!#!#!$!$!$"$"$"%"%"%"%"%"%"%"%#%#%#&#&#&#&#&"%$'$'#'#&#&#'$'$'$'$(%)/9:C$2<4?(1#*,4/73\j=Wc;?E2=B1MV7OZ8Q\9S^:Rc/KX.?J&;E$7@"4=!3; 2: 19190808080808070707083;"080808181919 2: 2: 3; 4=!6?"H(CO(,3'.18"18"29"3:"2:!*2Pa*gxBfwBfwBevAeuAeuAdtAdt@cs@br@O^-BN'>I&=G%;F$:D#7@"2; .5,2*1*0)/).(.',',)/,3-4/607081919 2: 3; 3"Uf/GT)Uc8Uc7Vd7Vd7We8Xf8Yh9\k;vE~SSW[be{륜hdiYUpC^n;Zi9Xg8We8Vd7GT)Ug/4=!3; 2: 190807/7.6-5/7:A+)0&+%*&+&,','-(.)/,3084= 7@" (/&%!'$#!                                                                            !!!!!!!!!!!!!!!!!"""""""""""""" " " " " # # # # # # "!$"%"&#&$'$'$'$'$($($(%)%)%)',*/-2 .4!06!17"07!08 /7FQ,hyCgwBfwBfvBfvAfvAfvAfvAfvBfwB]n8R_7KV3BL,F->F->F.>E.9A'AN&-44;$/6082: 4=!7@"@K>)IW+Vc7Vd7We8Wf8Xg8\j;\k:JnBRSTZ[Ջ׍ُُݓߔZYSsEyKtDZi9Xf8We8KY+FS(=H%8A#4&8?'9@(:B)>F,@H.$,")")$+!'! %!                                                                                    !!!!!!""""""""""" " " " " " # # # # # # # # # #!#!#!$!$!$!$!$ #"%#&$'$(%(%(%(&*(,*0,1-4 .5 /6*1+3Sc/qLqMrNsOrP]hCS^:Vb;Zg>^kB_n=R_6E->F-?F-?G.@G.@H/AI/AI/BJ0CK1DL1FO3Q`0GU)bpE[gB\hAR^6Ta7Vd8Zh9_om|LgvHdrF^kATa6Ud35="2: 19 19 19 19 19 19!29!29!2:"3:"3;#4;$5<$6=%6>&7?'9@(:A);B*F.?F.@H/AH/BJ0BJ0CK1DM2FN3GP3ez5Tb7MX3NZ4Q]5S`6Vc8Yh9dtBoFQRTYiijێݐޒݐێsiWS~Qm}IixHWe8Ta6MZ/oC9B&7@%7?%4<#6>$6>%/65<$3;#18"09/6:A+.4 17#)04:&6=(9?*;B,4= 6@!BM*,3'.'.##)"( %"                                                                                 # # # #!#!#!#!#!#!#!#!#!$!$!$!$!$!$!$!$!$!$!$!$!$!$!$!$!$!$!$!$!$!#!$!# # # # # #"" # # # #!$!$!$"%#&#&#'$'$($(%(%(%)$().+1,3-4-5$,.6P`-gxBfwBfvBfvBfvBfvBfvBfvBrDKW1>I%#;D&=H%KX.Vc7Vd7We8Xf8Yg9Zi:\k;uI~QSZiiky{|yj[WeuBUZi9Xg9We8Ub8LW4GQ3EN26>$08.5,3+1)//5#6;*27'37(.2#&+(-)0*0*1+2+2,34= (0)0'.$#)#)!&"                                                                                  # # # # # # # # #!#!#!#!#!#!$!$!$!$!$!$!$!$!$!$!$!$!$"$"$"%"%"%"%"%"%"%"%"%#%#&#&#&#&#&#&#&#&#&$'$'$'$'$'$'$'$($($(%(%(&*(-*0,2-3-4-5&--5Qa/qMrNsOtOtPqMuQ\o2AM'>I&";F$ER(KY+drFdrEhvHixJl{LmB|P~QSZijy{܊藳{kiUR|QcrB]k=Zh;DQ(;E$4=!2; 1908/6.5,3CN-4= +1-3 -2!',#'','-)/)/*0*07=(*1)0'.$$*"(!&$                                                                                     # # # # # # # # # """!! !!" " # # #!#!$!$!$!$!$!$"$"$"%"%"%"%"%"%"%"%"%#%#&#&#&#&#&#&#&#&#'$'$'$'$'$'$(%(%)%)%*',(-*0,2-3.4.5)0-5/8kyMhuLdpHWc;Wc;Ua;]n6Ub8Q\7LV3HS1CM-P]38A%8A&9A(:A):B);B*&19 6>$:B(5>!;D%Ue1IW*OY8MX3Q]5Tb7^l?fuFhxEnBVT[ijzۊߍzjZvGrLhwGftFdrFKY,^n;CM.FO2CL1@H/>F-:B*8?(HV(>G+AI/AI//6 07!29#4;%6=&7>'8?(:A*;C+*1")&-"(#)!#"                                                                              # # # # # # # # # # # # # # # # # # #!#!#!#!$!$!$!$!$!$!$!$!$"$"$"%"%"%"%"%"%#%#&#&#&#&#&#&#&#&#'$'$'$'$'$'$'$'$'$($(%(&*&+(-).+1.4!06"/5 /5+2.5-5\n5`lDVb;Vb;Wd<_n=\j#3; 2:3;"4<#5=$6=%6>%7?&8@'9@(4<#8@';B)E,?F-?G.@H.AI/BJ0BK0CL1DM2FO3GP3s>P^3M[.LW3P\5Ta7We8Yh9]l;k~AWS^zy{܋ӂoeM`q"3;!2: 2: 19 0807/7.6.6.6.5-5-5.6/607 18!&-&%$*% &#!"                                                                                  !""" " " " " " " " " # # # # " ""!  ! # # #!#!$!$!$!$!$!$!$!$!$"$"$"%"%"%"%"%"%"%"%#&#&#&#&#'$'"&$'$'$($(%(%)&+',*/,1.4!.4!/5!17"/6 -4.6&/]p4rNm}H_o<]k%7?%7?&8@&9@'9A(:B);C*F->F-?G-?G.@H.AI/AI/BJ0CK1DL1EM2GP3HR3KX-Q^4Xf9esFguGiwIkyKn}LnBabZijzzlgvJMZi9Yg9FS(l>`q:9B%3; 2: 2: 19 19 19 19 19 18 18 18 18 18 18!19!29!3:"'/#*#* '%&p87B4?0:                                                                               !!!!!!!!!!!!!!!   !!"""""""""""" "!    ! #!#!$!$!$!$!$"%"%"%"%#&#'#'#'$'$(&*G):C&8A&8@&9A'9A(9A(:B);B*.5.5E->F->F.?G.?G.@H.@H.AI/BK0CL0DM0DN/L[+GU)Xf8esGguGhvHixHk{GmBaaZty˾؆mZwKzMgvFdrCQ_3KY+FQ0@I,>F+MHY"!N],pBey9Q^5Yf;\j>Yg9\l;k|D[cZswԃۊ薪r`c{Pn~KixH`t3t>^n;6?"3< 5>#?G.5<$LV3IS2EO0?H,8@'4:(49(16'.2%/3',/%(+"! '$,$,#*!(!(%-%-*3*34= 5?"!&"!     9D"9D"8B!7B 4?5?8B!8B!9C"9C"9D#:D#:D#:D#:D#:D#:E$9C"8C!9C":D#"9B%08$,%,pDT`9JV0NZ5LX3NY5IS2?H+=E+3;$06"28$4:'5:)49)49)27(04&-1##'$(%*&+'-(./5!,306 3:#/67?&8@&8@%8A%?H*AL+ER*Tc5guGhwHkzJl|KRJ{OrEa]dm{݋ߎ|`gwJmAZcsAYh9S`8HR1FO2EN2BJ0AI//7>E-=D-"08!(,3/5 -4,1(.%*$("&!$ $ #"!)0*1+2+2,3,3-4-4-4-3,3,3,2+2+1%%%%%%%%%+2,2,3,3$$$$$$$%%%%%%%%%%%%%,3,3,3,3,3                           "'&,(/)0$+")(0#+%.?L"F,@H/BJ0CK0DL1EM2FO2FO2IS2`q:HV*Q^3Xg9etFgvGhvGhwGixGiyEpCrFYZjiszzgpNj}@pKjyGgvEey:GU)HV)bs<[k9:C&:C'7@$3;!1908/7/7.6.5-5-4/618!3:#4;$5<%$,^r2ER'3;!/7/6+3,3*1).&+$)#'#&"&"%"%!$ # ""!!!!    !!!  # # # #"%'.!)%,%,#!'&,&-'-'.(.(.(.(/)/)/)/)/(.'-$*$ &'.+2,3-3-4-4-4.4.5,3)0.5/6 /6 /6/6/6/6 07 07!07                               2<,6-86@7B:F(0")%,$,&.=J#4<"1908/74<#5=$6=%6>%6>%29 6>%9A(;C)F-?G-@H.@H/AI/BJ0BK0CK1EN2EN2GP2HR2`qZi:crAhxDlAR\_iigZcyMtNm}KkzKRa2KX-ES(J{GZj7#190819190807/6.5-5-5-5-4,4-5$,AP BP$#+,4,4N],AM(>I&=H%"3; 080819 3:!4;"4<#4<#/63;"8@':B)F,>F,;C*>F,@H/AI/AJ/BJ0CK0DL1DM1DM1EN1EO1FP1L[*Ra/cx4Q_3_s3l{J}NOQnB{O~QXZYPPsFvJJl?Q_3JX*_n<\k:Yh8>G);D':C'9A&1:4"3; ,5)1)2,4+4-5,4$ $ $ $ $!$!$!$!%!%!%!%!%!%!% $#""# $""# # $ $ # #"!  !"""#"""" !"#!%"&.5-5-4,4+3*2)1(0'/*2$,$-$,$-&.&.)1*3+4.7/8091:2;                   +5*45="+2)0(/'.!(")*2/74=!8A%2: ,4$,MY3HS0AK*E->E-=E,=E,=D,I%>I%#( !""!     M\+GV&ET%6@7A!7A!6@!6@!6?!5>!5>!5=!4=!4< 3< 3; 2: 1908.6+3%,$##$ $ $ %!%!%!%!%!%!%!%!%!%!%!%!%!$ $#"""# $!%"%"%"%"!%!%!%!% $##         #"'',-6'/%-$,$,#+#+%-'/(0*1*2*2)1&.#+#+%-)1*2)1$,$,$-               " &# '&")'.$,0719!18!/7-5*2&-JU2GR0&-5-49@)8?(+2:A+:A,;A,:E"08=D,&>F,)/+1,2)006"07"+23:$29".518 5=$082: :B'5>!@I+JX*FS(We7Yg;Ub7Vd7Wf8Yh9[j:\k:iyGjzH\l:[j:kzJiwIZh9P]3KV4EO0EN1EM2@H-?G-?G.>F-=E-:A)"5>"5>"5>"5>"6?"7@#7@#6? 8B" !# $ %!%!%"&"&"&"&"&"&"&"&!%"&!%!% #"""#!%#'#($($($($($($(#($($($($(   !!!!!#(#(%)%),3,3,2(.)0*1*1 ($+$+") $         !"/7(1(1,62;6@GX g/!(07!/6 07 18!18!08.6&-JU2JT3BO&IT02:"+2)0)/(.7=*6;*5:)+1*0&,&,%*&+&,'-@I.@L$*1+2,2.55<$5<$:A);C*:B(?H-2; BK/CL/EN/Zi9Xh5s?FT)Q_3Yg9nBKY+qB{KyIPSSS9B$i~G,=F+*,2).28$ #*$,'/+2+3.5/6/5 -3'-',$)#'"&"%"%!$"!$!# # " """!!   !!5>!7@#6@"7@"5> 4= 5>!5>!5>!5>!3< 2;3< 5>!5>"5>"5>"5>"6?"6?"7@"7A"9B#9C#  !## $ $ $ $ %!%!%!%"!%!% $ $##" "!%#(%)&*&*&*&*&*&*&+&+'+                 !!" # #!$!%"&#')39B$5>+5;E%=H'>H'06"06"07"/6!/6 07 07 .5/7+3%-IT/O\3NZ36=$6=%,3+3+2+2+2+2;B+;B,;B+;B+;C,E-F-?G.08@H.BJ/BJ/CK0CL0DM0DM0FO1FP1[k9]m:wD}EM[/Sa6Tb6P^2We8Yg9\k:k?cw8j~>i}=Wf7Uc5KY,P^2O]1GU(IyF^n:\k9K[)>G)4= 9B&9A&8@%7?%5=#2:!.619 18 -429"07 +2+26=%"*")'/'/&. ("*?J&9B#6?!.7  "&" $ # #!$!$!$ #"!!!       !!!!""'/193; '/191:.7,4/708/7.6-5.6/70819!##"     " $!%!%!%"&"&"&"&"&#'#'#'#'#'$($($($($)%)%)%)%*&*&*"'&*%*$)!&#!   !!!" #!#!$"%#&"%#&"%"&#'#'$'$($(%)&*',).).-2 .4!.5!.4 /6!07!/6 .5/7&.,4+3%-#*2:6>$7?%7?%7?&7?&7?&7?&7?&7?&8@'8?&9@'8@'8@'9A(:B);C*=E+>F-?G-@H.AI/AI/BJ/BK0CL0DL0DM1DM0EN1EO0FP0Tc2L[*]l;Wg4Xh5Zj7[k6]n7wC{FKI}IzHvEbs=ar=`p<_o<]m;]l:[j9Zi8\l;@J*=G)9B%:C&6>"2:5="4="4<"2:!18/7-5-5-4,4&"*&.%,"*#*$+ 'Xn'Xi1F*"8@%7?$082: 4<"19 .6-5/7/7-4!("*'/'/&-&")#*%&q9Ug.6A 7A!)36@!  !$!$!$!%"%"% #!%!%!%!$!%!% $"#!$!%!%!%"&"&!%#!! $"&!%#'#'#'$($($)$)$)#("& $"!    !!"(!'####$$#)$$%#*$!(,3%    !!"! " """"  !#!$"$"%"% #"%"%"%"%"&#&#&#&#&#&#'#'$'$'#'%(%)%)&*',(-).)/-3 .4!06"06!.5 /5.5/6/6(/.6-4$,$+%-4-4.619 4<#5=$5=$2:!08:B)F-.6F,=E,/6:B)8?'.6.5.5-5-5.5-5-5.5.5.54;$/6/7?G.@H/18?G.>F-08=E,07?G.:B)9A(>F-/7>F-=E,/7>E,;C*%=D,;C*-5&19!.507 *1*1*13:$4:%3:$-4,2-429".5/607192:3< 4= AJ-?H*CM.DN/EN/EO/DN/DM/5>!3< CL0AJ/?G.?G./7=E-;B*8?':A);B*;B+9@),3:B*,39@(9A(,44<#!)"*"*!)'."*%,&$*$")#%$,6)3/8     ,3-3-4+2*1*1*1(/#)(/(/&-&-%+! $!% #"%!%"" $ # # $!  #"$(   ! !   !! !!! #!!  " # #" # #!#!#!#!#!$!$!$!$!$"$"%"%"%"%#%#&#&#&"%"% " #" ##&#'$'$'$'$'$($($($(&*&*'+(-).*0,2.4!/6!06"/5 .5.5/6-4/7/6/7&.-5$+")-58@'2:!=E,6=%,4,3-49@)4= '7?(9@*9@* '#*%,'/")$+$+$+ '$+$%"("#$!'1!                   .5/5 .5/5 ,3+2,2+2#$+2)0+1*1&-*0$*)/#"'  # # $#" # $ ##" # $ $!$!$ #"!! !!!!!!   " " "! " # # # # #!$!$!$!$!$!$!$!$!$"$"$!$ #"" #!$"%"%"%"%"%"%#&#&#&#&#&#&#&#&#&#&!%$'$'$($($($(%)&*&+',(.).+0-3 .4 /6!06"06!/6 /5/6.5/607 )10808*1!(9@(;B*;B*8?'5>!*1)/,3*0(.(.-3.4*1+2,33;#7?':A)/6;C)>F,19@H-AJ.BK/CK/CL/CM/DN/EN/KZ)Ve4Sb1Xg6Zj9Zj9\l:[j9Zi8Zi8Yi8Xh7Xg6Yh8Xg7CM,BL,?I*?H+>G*=F*@H-;D):B(:B(.6-5;B*:B*")$,#+"*'/ (&-&#*$+$$"( &""$#!                                 18!(/07 /6 .5,3$*1$&%%&%$##(/+2+2*1)/+2%%+   ! ""! !"""  """" " " " " # #!$!$!#!$!# #!#!#!$!$!$!# "! " #!#!$!$!$"$"$"%"%"%"%"%"%"%"%"%#%#&#&#&#&#&#&#&#&#&#&!$$'$'$($($($(%*&*&+(-(-).+0-3-3.4 .5 06!/6 /6 /6 /6 -407 /619 19 #+ 6>%29"*2.5*27@#3<5>!7?':B*F-=E+?G.@H.@H.AI.AJ/BJ/CK0DM1CL0CL/BK.BK.BK.BK.BK.AJ-AJ-AJ-AJ->G*4= 4= BK.BK.BK.BK-BK-AJ-?H*@I+>H)>H*>G*>G*=F);D(9B&4=!09082; 5=##+&.(0(/!)&.$+&$+ '$ '"(%""## "                                           9C"9C"9D"*5+2)0.5%,%%)/%%$%$$$+1-4$+2/5 ,3.5/6 /6 /6 /7 07 07 " " " " " #!$!#!$!$ # # # # # # # "!! " # #!#!$!$!$!$!$!$!$!$!$"$"$"%"%"%"%"%"%"%"%"%"%#&#&#&#&#&#&#&#&#&#&"$'$'$'$'$($(%)%)&*&*',(-).+1,2,2-3-4 .5 /5 06!.507 /6-519!29!2:!#*9@(,3,3:B);C+,4-4=D+F,>F,F+@I.@I.AI.@I.AJ/BK/AI.1919=F+=E+9A'4<#8@&/65<$5<%BJ.JU1IT1;G#IS1DL16=&;C+.6=D,/70809191:1:>G+F,?G-?G-F,(:A*-4-5=E,=E+L +3(0%,&-&-& '$* '$$&%####!"                                                               ;F#$")+2,43;"7>&8?&4<#9A'F,>F,>G->F,.6;C*-519!,4+2-36<'1:8@%1:09',/6.67=),1!%*%*&+*0:@+8?);B,,3:@-9?,7=*)/#HV*#* )1/6/67?&3:"@J)FU&'-'-3=6>#087=))0+1/6 2:".6/79A(7?%F->F-)17"+2,3-5.68@&7?%0;,6)3)2                                                                          # # # # # # # # # # # # # # # # # # # #!#!#!#!#!#!#!$!$!$!$!$!$!$!$!$!$!$!$!$!$!$!$!$!$" #"%"%"%"%"%"%"%"%"%"%"%#&"&"%#&"&"&"&"&#&"&#'#'#'$($(%))39C$3=1;8B#/9+5#+ ''/*109;ILZ,"* &-5=E+?H-1;,6)2(2(2                                                                       # # # # # # # # # # # # # # # # # # # # # # #!#!#!#!#!#!#!$!$!$!$!$!$!$!$ # # !!$"%"%"%"%"%"%!$!$!$!$"%"%"%"%!$#&"%"%"%!%!%"%"&"&"%!%!%"%"%!%"&7@#'0(2(2'0'0&%")")")-5>K HV*2: "*!' &%+F+/8                                                                              # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # "!  "!$!$!$!$!$!$!$!$!$!$!$!$!$!$"%"%!"%"%!$!$ # #!$!$"%"%!$ # # # #""""!!! ""$"(%")!)*21:6>#6>$08)1%, ' &+3-508-57?&:B)F,?G-?G,>F,>F,=E+)+2,4-5.6/7?G,?H-$Qf$AO#2: ,4.5+2.4 .4 !(4= 5?!'1(2 (26?!3<6?!                                                                                 " " " # # # # # # # # # # # # # # # # # # # """! " #!$!$!$!$!$!$!   #!#!$!$!$!$" #"$!$!$    # #!$"$!$ #""" !   " !'#)#*'.*2082:!4<"/7$,#+"*")!' '>G,?G->G,?G,?G,?G-?G,?G,>F,))/(.(.7=)8>*9@*;B+-5.6/7EP.9C#9@+(/DN.:B*%!(s;HV*4="-5.6-4.4+1).$)',%*$)#("&!                                                                             " " " " " " " " " " " """""!! ! " # #!#!#!# #"     # # #!#!# # !#!$!#     " #!$!$ #""! ! !        ""$% '!)$,,4.6072:!-5/7.6#*"*$,!( '@H-@I.?G,?H->F,7?%4<#,4*15? '-(.7B /7.6.5-4+2&,%*#'# #-1&.2&-1$/4&04'15'"'%*6<):@,4>CP%191: '!)%-o7IW*7@#2:!/7,4,3+2)/&+%*$(#'"& $ ##"                                                                                 """""""!! !" # # # # # #!      " # # # #"" # #!      " " " #"!             &/'0.7EV d},/607 2:"08 19!-53:!18#+-5#*"*%-!( ' :B(-5:A*:D"(.:E!09/7/7L\)4>'-%+5;)*0*0+21:2;7@#Qa,t( '")#*s;m5[o.GU(CP&4<"!)")-4,2*0'-%*$(#'"&"% $!$ #"!                                                                                  !!" " " # # # #        " " " "" " # "      "!"!"!     !        !  &/(2,66@Rd+"*/6 07 /6 .518!18 19 2:!.62:!$,08#+&-")")%,"(.5/7/8/79D!(.'-1:29$*1+2-4,4-4074<$=D-4<$6=%7>'9@(7>&>E-8?'-5.5!("*#+s;i1m5[o.CQ#CQ%7E1:08 (&-4.4 )/&+%*"&"&"%""% $ #!$!$"!                                                                                  !""" " " " ""        """""! """      !!!!!                  4=!6?"*36@ ;E$>I&'.+2/6 /6 /7 07 18 18 29!-42:!+33;!)108$,'/#+$+")#*%+@K);B+9@*+2,3-4-4-418!,3;B+-418+2(.&+(,-1#16'!(")#+$,tL DR&AN#?M!1:/7*2-5%--4'.+2'.&.)0+1&*$("&"&"!%!%!%!$!$ #" ""                                                                             "!  !"!      !!! !                    "!" # $"&#'%))27@"7A#8B$4> /5!-4/5 /5 ,3/6 &/6 -407 /608 .50719 2:!29 .63;!4<"4="+4.77?$8A%FS(FT(FT(GU)IV*HV)GV(IW)FT&LZ,LZ,KY+IX)KY+IX*JX+IW*HV)GU(BP$ES';I#6?$8@$ER)FS)FS)FS)FT)GT)GT)GT)GU)ER'GT)GT)FT(ER'FS(@M"DR';HCP%3< 1:08+3.6-5+3*1+3 (+2%,#,DQ(!4=!3; 2:19/7-5.6.6-4#++2+3 '*1 (!(DR$GX"=H%:D$(25?!3<#(!% $ #!$ #! #  ""!!!!                                                                   !          -8,7-79C# '")!($+#$%#                      "" #!$!%"%"%"&"&#'#'$(%*&+',)/*0+1,2-3 -4 .4 .5 /5 /5 ,2/5 /6 &&&/6-4.507 07 !)!)0819 19 07070819 3;!3;!3<"4<"4<"4<"5="4="5="6>#5="36?!'0"& $#""!$ #  "!!                                                                     .9-8-8,7,6,6 ' '%$$*1##"*0              !"!#"%!$"%"%"%"%!$#&#'#'$($(%)&+',(-*/+1,1,2-3-3 .4 .4 .5 /5 -4+2.5/5 /6 & ' '/6/7-4-4/607 08 !(!)!)&.0819 19 29 2: 2: 2: 2: 29 29 19 19 /7/70808/7/6.5'/*2,4,4*1 ((0)1 (&."*`t3Xj0Uf/6@!         !!""""### # $!$!$ # # $ # # # $!$&-(/+2)0,3+1.5+2-4/6 +5+6;F$,7                                        " ,2&   !!"     !$!$"%"%"%!$ # # $"%"%"&"&"&"&"&"&"&"&#&#'#'$($($($)%)%*&+&+',(.).*/*0,2-3 -3 .4 .4 -3+2*0'.&-#*(/&, '*1*1,3+1-4-4,3-4-4,3,3'. '")&#*)1+2,3*2'.%-'/"*HV)GU(>MTf/Wh0Vh0Wh0Vg/Ug/>J$6@!   !!"""### # $!$"&"&!$ # # $!% #!%!$#'.(/$+#+2,3$-4,3/607 +59D":D",7                                "    +1"(!($% &  !!     "!#!$!$ #!! #!$"%"%!$!$ #!!$"&"&"&"&"&"&"&"&#&#'!%$($($($($)$(&*&+&+',(-).)/,1,2-4.7.50909&/'.&-')1)0(/#*&-)0'.'."*,3*1+2'.)0'.&. ('.'/!)$+AN&BO'DQ(CP'DQ(CO'Qb,BN&Te.N_(@M%>I$=H$;F#;F#:E#:D#9C"8C"8B"7A!6@ 7A!7A!4>/9,6(1      ""## # # # $ $ $!%#&"&!$ $ $ #!%!$"% #!$%,&,+1(/*1+2## $-3/68B!8C"9C",7;E# 6?!6?"6@"7@"7@"7@"7A#7A"7@"4>7@",56@!3<4=8B#9C#:D%;E%2<8B#9C$9C$:D$:D$:D$:D%;E%;E$!6?!'05?!6?!5>!5?!'1'15?!5?!6?!8A#6?!6?!3=1:3=6@!1:3=7A"4>2<3<7A"9C$9C$9C$9C$9C#9C$:D$8B":D$9C#)3)38B"8B"8B"7A!7A!6@ 7A!)25>3=/92<6@!'1    !"" #!$"&"&"%!$ $ $ $ $ $!$!%#!$ $!$"!$!%  #('-*0$+*0)0%%$*1,3-4+37A 9C"+6:E",77B                    "       !%*0$%&$!(!(,6,7-7 !!     " """ # # # # # #!#!#!#!$!$!$!$!$!$!$!$"%"%"%"% #""%"%"%"%"%"%!%!%"%"%"%"&!%"&"&"&"&"&"&"&"&"&!%#'#'#'#(#(#'#(#($)$)$)%)%*%*5>!5>!'07A#4= 3<3<5>!8B$7A#6?!'0'1'1'1'1(1(1(1(18A#8A"8A#5? 6@!8B#(2(2)2(28A"8A"6@ 7A"0:7@!(27A"4>.81:7A"(21;             "! # # #!$"&"%!% $ $ $ $ $ $ $!$ # #!$ $ !$!$ # $!'%+" *0*0$%# $,3,3,4+3:D#+5+6:E",79D!                      !""%$*0,2,2& ',6:D#;E# !!     " "!! # # # # # # # # # # #!#!#!$!$!$!$!$!$!$!%"%"%!%! $"%"%"%"%!%!%!%!%!%!%!%"%"% $"%"&"&"%"%"%"%"%"%"%"&"% $"&"&"&"&"&"&"&"&"&"&"&"&"'"'"&$($($($)$)'0'0'0'05?!5> 4= 6?!5> 5> 6@"6@!'1(1(16?!6?!8B#7A"6?!5? 7A!(2(26@!  2;4=(1(13<               #!$"!$ $!$"%!$!$ $ $ $ $ $ $ $ $ $" $ "## !$ #)#)*1*0+1# # $+2,3,3,3(/;F$+5+6:E"9C!:E"              !#"#+1*0*0$ '&+5,6,70;  !!     ""!!" # # # # # # # "" # # # # #!#!$!$!$!$!$!$!$!$"%!$ #!!$"%"%"%!%!$!$!$!$!$!$!$!%!%!% #!%"%!%!%!%!%!$!$!$!$!$!$!$!$!$!$!$!$!$!$!$!$ $ $# $!%!%!%!%"&"&"&"&"&#'#(#(#(4=!4= '0'0'0'0'0(2(2(25? 6@!5?!5?!  6@!6@!4>                   !!"  #!$"!$ $!$!$!$!$ $ $ $ $!$ ## $ $"# ! $ $"# %*0+1&-*0$$*1$,2,3+2+2)0+6;F$+6:E"6A6A        "#*0*0+1"(,3")!),6:D#;F#4?       "     ""! " " " " " # "     # # # # #!#!$!$!$!$!$!$"$!$!$! #"%"%!%!$!$!$ $ !$!$!$!$!$!%!$ #!%!%!$!$!$ $ $ # $ $ #  # # # # # # # # # ## # # # # $ $ $" $!%!%!%!%!%!%"&"'"&3< 5>!4= 5> 4> '05>!'0'0!2<!                 *1%,"  $!$!%"!$ #!$#!$ $ $ $ $ $ $#"" $ #! $! ! $ !"(&," +2*1+2$%$+2,3$(/*1 ( !$                ,3.5*1)0(/!%!$!$!%!!$ !$" $ $## $ $ ##   $!#" # #"  %*1+2,2*1,3#$%$+2+2,3!()0'.$,;F#,7:D"   #*#*,7-83>2=           "     ""!!""""""            " # # # # #!#!$!$!$!$!$!"!$!$!$!$!$ $      # # # #!$!$!$ #!$!$ # # #"""!  !!!        !!!"""""""""""" #"""#"#######"" #                  *1# ,3-4$(/$+"!$!%" $ $" $#"# $ $ $     #! !"" # "&,*1-3*1+1*1$,3,3$*1*1%(/%&-&-$+")!(5@;F$8C!         "     !!! !"""""              ! # # # # #!#!$!$!$!$"! #!$!$!$!$ #        # # # #!$!$!$!$ # #"""        !!!""""""""!!""!"""! ""!!                   +1+1-3#-3+2$'.&-"!$!$# #" $""# # $!$    " #! #!  "# ,3.4-4-3-4-4$,3+2$*1'.$,3&-$!           "     !!!"""""                " # # # # # #!#!# #!  #!$!$!$ #         "" # #!# #!# # #"!           !!!!" """""!!!!!!!!                           ,2# .4.4$+2$&-(/'-  # $ #  "#    !$!$ $!$  "" $"" """ #  ")'..4.5$-4$$+2$$*0")        !     !!!!"""""                ! # # # # # # # # #!! #!#!$ # #      "" " # # # #"!       !!!""""!!!!!                              -4,2# .5-4+2# *1*1&,% $"" #! ! #    !$!$ $ $!$""" #! #   " !(/.5+2.5.5/6,2,3+2   ! ! "      !!  !!""""                  " " " # # # # # ""! # # # #!"" " " ""!!     !!!!!!!!!  !                              .6.5/6 .5.5+1+2*1*0(/(. !%!$   #! #! #    !$ $ $ $!$ #"  $"# "#" !"   /6-4.6.5/6 ! !"      !! !!!"""!                  " " " " " # # "!" # # # "  !""!""!   /6       !!!!                                  .5+2,2#$)/*0*0)/ " #!""""   !%!% $ $ $!%!%!%!% # !$# !!  !!!!!!!!          !"""""""!  !""" !!         .8,6&$$+1*0*0*0""*0 & &"(+2!(-4-3,2$*0*1*1#))0"%"% #!$ # ## # #!"!!!!                  +2.4-3# " ,2                    M##! !         !!      " "*1!'&!(,7  !!""####&0"""              "" #(/)0)0)0)0*0&$$+1+1$$+2+2,3*1-4/6+6             ;G#,7:D"8B!9C"#*%,$$+2%)/*0+2*1)/" !#!$ $!%"&"&!!!!!!!!! !!!!!!!!   !!!        ;I2=)0""!              ! $"##&+6:E#          !  !""##"&/*2)2&/ "  ! "           "#(/(/(/(/)/)0)0*1*1+2+2#$*1*1+1*0,3+1$.59D"              ;G"8C :E"9C"+5:D#,3,3(/$%#,2+1-3'-"  ""#!!!!!!!!!!!!!!!!  !!!        ?N-8,6)0$!            !   ! "*0*1$%+6                  !""#"(1(0,4-6#+)2"""!        !!  " $(/(/'.&-%,$+$+%,&-(/)0*1+1#*1*1*1*1+1)0+1$+26@M;F#.9+5                  ! )/*0$$*5,6             !!"##'0-5+3-5*2.6*2"""!     !!!   # $'.%,!(!(%,'.(/(/(/(.'-&-(/*1+1+1*1*0*1*1+2)0$,2*18B!HW(            6A9D!9D!9D"+5)1,3+2+2$%$-4+2.5.4-4  ! """   !!!!!! !"       !CR!;J.9,75?*4%   !              $"*0$%,6,6       !!""#)2+3-5+3+3.6"*#,"""!         !!!!!  !$")!(&-'.(/)0)0)0*0*0*0)0(/'.(.*1+2*1*1*0*1*1+2%$,3.59C"            7B9D!:E"+6;E#9C"(/#*%*1$+1$,3+2.5.5/6  !!"   !!!!! ! #     DT#;J0;:E#+6+5!(+2                  #"$+1%8C"            !"##+3-5$,.6.6'/.6"*,5""!       !!!!!!  !$%%,'.(/(/)0%,+2##+2+2*1*1*1)0'.(/*1+1*1*0*0*1$%$$$9C";F#           :E":E"7B,6;F$)1+2*1+2$$,3$,3$/6 /6 /6 !   !!!  !!      =LAP :I;E#,69C"!(+2+1     !     !      (. &)/+1%                      !!""#(0+3%-/7/708,4.6)2"""!!        !!!!!"" !%"&-'.(/(/*1$$+2##+1+1+1##+1*1)/*0)0*0*1*0*0$$$%%(/,37B :I            8C7B:E"+62=:D")1&%'.$,3$+2-4.5/6+2 !!! !!   !GV%GU&.9-7:E#+6& '&%                 %+)0)/       !!!!!!!!!!!            !!"#(1!)!)/7*3,4/7.6-5)1#""!      !!!!""  "%&.'.(/(/(/)/)0(/*0##*1*1*1####+1*1*0)0)0*0*1*0$%%%$$$*2:D"           0;,78C :D"IX)     !!!!!"""""" " " # # # " """"""!!!!!!!   ""#!&-&./71:4LET#       !!!!  !!!!!!!""" " " # # " """"!!!!!!!!     ###$',4/83< 5>"5=!5>"4=!.6#+%# #"% # # #""-4,2" ,2(/,2 +1+2!$!%!%"%)0#*#*#)"(%" " '%,(.)0*0*1" *0*1+1+1*0(.$+'-*1,2*1*1*1*1*1+1,2,3+2*0,2-4$$$$$$,3+2$+1,3:E"3=       " !!":E#;E#,7-7-8(0)1'.,3,3!($#") '   "  !         ! !$"&$ '+59D";E#@O KZ)        !!      !!!!!!""""" " " " """""!!!!!!!!    !! ###!')0-52;5>"4=!4=!5>"2;+3")  #!% $ # ##*1-3*1" ,2" -4)/# ,3!%!%!%"%&,&-&-'-&,%+"(#""('-)/*0*1" *0*1+1+1*1(/%+'.+1,3+1*1*1*1+1+1,3,3+2*1,3-4$$$$$$,3*1$$$'.3>      !!;E#;E#,7-7-7"5=!5>!5>!5>"08'/"!# #!% # $ # # #+2,3# -3+2,2+1-3-3*1$+!%"%"% &,&-'-'.'.'.'-%,!("%&,)/*1+1*1*0*1+2+2+1(/%+(/+2,3+1*1*1+1+1+2-3-3+2*1-3-4$$$$$$,3+2,3$(/#*,6    ",7,7:E#,7+3+2&-%"),2!(+2$#*%   "             %*0+1$%+5+6:E#3>JY)GW%              !!!!!!!!!!!""" """"""!!!!!!  !     " # $##!')1085>"5>!6?!6?!6?"5=!-5##! $ $!$ #!$ # #+2,3# -4-4-3-3-4-4.5)0!$"%"%!#)%+&,'-(.(.(.(.'-$*$$%+)/*1+1*1*1*1+2+2+1(.%,)0,3-3+1+1+1+1+2+2-4-4+2+2-4.4$$$$$$,3,3,3$'.'. '   ;E#!6@!6@!6?!5> 2;(0"#  #!% #!$ #!!$*0,3+1.5# -4.5+2-4+2!%!%" !'%+'-(.(.(/(/(.%, &$%+)0+1+2+1*1+1,2,2+1'.&-*1,3-4+1+1+1+1+2-3-4-4+2,3.4.5,3$$$$$,3,3,3$)0+2$+ -4*1*1,3#*,3$,3 '$+%     !             &*1$+1+2#+,6,74?CR"HW&                !!!!!!!!!!!!!!!"""""""!!!!!  !!       #!% ##" &&.2:6?"6@!7A"7A"6@!5?!/7#"## $!% #!$ #!% ## +2*1-4# .5.5+5.5# +1!"(%"#)&,(.(/)/)/(.&, &$%,*0+1+2+1+1,2,2,2*1&-(.,2-4#+1+1+1+2+2-4-4-4,2-4.5.5,3,2+2%%$,3-3+2$$%,,3$+%'.%,%$%,%    ""               &*1$+1$%,0:2=-8BQ!HW'               !!!!!!!!!!!!!!!!!""!""!!!!!!!!             !"!$!$#!#%,/75>!6@!8B"9C"8B"5? 4= &, ## $!$ $!$ $ !$# +2*1.4# /6+2/6.5)0 ! #)!' !'&,(.)/)/)/(/&, &%'-*1+2,2+1+2,3,3,2*0&-*0-3-4#+2+2+2+2-4.4.4-4,3.5.5.5,3,2,2,2,3$,3-3+2+2,3&,3$+#*$+                  &#*1+1 '#*.8,76A;JJY)=L                !!!!!!!!!!!!!!!!!!!!!!!!!!!!  !              !" #"& ## &-54=!6@"8C"9D":D"7A 7A!2;" #! # # # $!%!%&-# ,3# +2.5/6.5.5+2& !#)$)"(#%&,(.)/)0)0)/&,%!'(/+2,2,2+2,3,3,3,2(/'.,3-4-4$+2+2+2,3.4.5.5-4-4/5.5.5,3,3,3,3,3,3,3-4-3 '%,%-                   &#*1$$%+6:E#-89D JY)DS#!                                                               "#"% $##(/2;6?!9C";F";G#9D :D"6@!!"!# $ $ $ $!$#!%*0+2# +2+2/6/6/6*1#   #)$*#)#%&,(/)0*0*0)/%+$$**1,2,3,3,3,3-3,3+2'-*1-4.4)0$$$$.4.5.5.5-4.5/6.5.5,3,3,3,3,3-4$+      #              '#+1 &$ '#+,7:D"8CGU&0;7CJY)                 !     !!"           "!             ! # #"% #!#"+35?!9D"JZ&GW#"( =H# # $# # $ $!$ #!%(/*0# ,3$ .5/6-4$ '"    !#)$*#)""((.*0*0*0*0(. &#)*1,3-3-3-3-3-4-4,2'.+2.5.5.5$$$$.5.5/6 /5.5/6 /6 /6.5-4-3,3              !  '#+2#+1,3&-4?3>-8:E"IX(JY)                 !!       !!                   !" ##!% $##  &4= :D"=I#       ## $ $ #! $!$!*0)0" # +2-4.5.5*0*0#!  $*%+#( %+)/*0*1*1*0&,$(.,2-3-3-3-4-4-4-3)0*1.5.5.5.5$$$.5.5/6 /6 .5/6 /6 /6.5.5-4                '$"($,2%'.3=,7;F#9D!;G"JY)              !                                       !"  # $!% $#"!1:9C"-4       "  $! ##!$ $!%*0*0$ *1+2'..5+2# )/!      %*%+!'$'-*0+1+1+1)/!(#*+2-3-4-4-4-4-4-4+2(/.5/5 .5.5,3$$.5.5/6 /6 /6/6 07 /6 /6.5           #     !'$$")+2,3%,/:,7-77B;F#9E           !    %*'-$*','-'-'-'-'-'-'-%+%+                                      !" #!$ #!% #!#!         # $ $ $"""" $+)0# # ,3+2-4*1*1" # ##(%+%+%#))/*1+1+1*1&-%*0-3-4-4-4-4.4.4-3(/-4/6 /5.5.5-4-4.5.5/6 /6 /6 /6 07 /6 /6          #   "  '*0%#*+2$+%!),76@;F#;F":E!IX(!           ','-'-!&'-',!&"!!!!'&,'-'-#)"('-(-'-                                        !" #" #!% ### !       "# $" "! # $!$*0)0" *0,3-4*1,3# (/#!  "$*&+$* '-*0+1+2+1)/ &'.-3-4-4-4.4.4.5-4)/-4/6 /6 .5.5.5.5.5.5/6 07 /6 /6 07 07             "  $#*$$+2%%,%,;E#8C ;F#3>"%.2;+5JZ'M\* !!!!!!!!!!     # $ # $ # $ $ $#!         ##  ! ## # $%+'.)/# *0*0$ ,3)/$+#)##"! &,'-"((/,2,3-3,2&,*0.5.5 .5.5.5/6 /6 +1.507 /6 /6/6/6/6/6/6 07       %,%%$,3&-)0*2*1:E#;E#:E"-8-8!       #)(-                          "( !$* (&,'.                        09/75>"5>"+4095>"%.2;-7M]*HX&K[) !!!!!!!!!! ,3 $ $  # ## ###        $" $ #! #)0)/# )/# *1# # )0!(##"   '-%,&,,2-3-3-3(.)0.5 .5 .5.5/5 /6 /6 *0/6 07 /6 /6/6/6/6/607      %,%%!(,3&,3)0*1,7;E#:E#:E"-8!       (-                             ! % %+"5>"&/3<5>"%/1;,6M\*JZ(HX&IX' !!!!!!""!! +1,2 #!$!$!$ $!$ ! $#         $"# !!#! #" $&,'.*0# )/# +2(/)/$+#)"##!   '-%+2-3-3-3)0)0/5 /5 .5.5/5 /6 /6 *107 07 /6 /6/6/6/6/7   "  #*&%#*,3&-,3&+3(0:E#,7:E";F$       %*                               "( $*"6>"6>"$-4=!5>!%/1;*4@PKZ)GV%ET#!!  !""""! " " ,2-4!%!$ $ $ $ $ $!#           $! $#! !!)/*1# (/# +2)0# *0"!"#"!   $*+2-3-4-4*1*1/6 /5 /5 /5/6 /6 .5-407 07 /6/6/6/6/6   &&%%,,3(/'.*1+3,3,7:E#,7;F#"5>"5>!5>"3;.76?"6?"5>"$-5>!4=!%/1;'1*4KZ)IX'ET$BQ""!!!!"""""""! *0" *1,3!(!$# $"# $$"        $ # !!"!## $(.*0)/" )/# (/+2)/$+# !"""   &-+1-3-4-4+1,2/6 /6 /5 /6 /6 /6 -4/607 07 /6/6/6/6 ")$+%&-*1,3(/%*2,3+3,7,7,6:E#!                                         !' :E"$+&,              5>!6?"5>"5>"6>".72;6?#6?"3< %.5>!4= %.1:'0'1)4HW'ET%AP"""""""""""""! " " " # ,3!%!%!$!$ $ " $ $           !%  # #!"!#%,*1)0+2# *1# # (. !"""   %-4.4.4+2-4/6 /6 /6 /6 /6 /6 )007 07 /7 /6/6/7 '.&--4'.'.,3$+%(/,4,4;F$:E#,7,7!  ! (.                                    "':E!=H$'.              4= 6?"6?"5>"6>"6>",54=!6?#6?#09&/5>!3<%.1:&0+5+5+6DS$5@"""""""""""#!"+2,2,3# # ,3!$!% $!$ $!" $# "       $ $ !!!"# ##))0)0# )/# # # )/*1'.  !""!   -4.4.4+1.5/6 /6 /6 /6 /6 /6 .507 07 /7/7/7'.,3(.&-,3'.%+2,3-4-5,7:D#,7      (.                                     !&=I$%+            3<6?"6?"6?"5>"6?"4=!,56?"6?"6?".7'05>!2;%.09&/,6(2*5*53>!"""""""""" #" #! "*0+1,3+2*1,3'. # $ $  $ $ $##    !%  !%!%"%!$   $"!"!(.(." )0(/$ # # *0+1  !""!   .4*1/6 /6 /6 /6 /6 /6 ,307 07 07 /7/7(/%,,3*0*1-4(/-4,3-5;E#:D#,7    !                                        !7@#6?"6?"6>"6?#1:.76?"6?"6>"+4(14= 2;%./8%.+5(2)3.9.93="""""""""# ## #"!" )0*1+2,3# ,3+1"%!%!$ $! #" $!  "%"%!%  !$!%!%!%""!  #)*1*1# (/# # # +2" *0  !""!   (//6 /6 /6 /6 /6 /6 -407 07 07/7,3,3)0,3+2,3.5*2:E#.5,7"!   "                                           7B#                             %-4=!5>!  5>"4= 4= 6?"7@#6?"6?"6?"6?#.7/86?"6>"5>")2(14= 2;%-.7(1)2'0,6+5*44>"""###### # $ # $#"!,2*1-3-4.4# ,3+1 $!%# $ # $ ! $ $ !"%"%"%!%  #!%!%!%#!!$ $"#!)0+1" *1)/# $ # *0+2%+"  !""!     /6 /6 /6 /7 -507 07 0707%,-3-4)0-5.5*2/607 "!!     !                                          ;F"(.                       ! 4=!/85>"3<4= 5>!5>!4= 4= 6?"7@#6?"6?"6>"6?"5>",51:6>"5>"5>!(1(13<2;$-,5*3&/(2*4)3.8/9"######## #!$ $!$ #"",3-3-4,3# # +2$*!$ "# ## ## $!%"%"%!%  #!%!%!% # #!%!  (/(/+1(.)0$ $ *1# $!   !!!!   #'/6 /7 .507 07 07-3,3,3.5.5/6/6 "!!!   !'-                                          "6?"3<4= 4= 4= 5>!7@#7@#6?"6?"6>"6?"4=!+42;5>"5>"4=!(0'02;2:$-*3,5&/*4'1(1/9#"####### # $!%!%!$ #"!!# -4,2.5-4# +1# !$!% $ # $ $ ##!$"%"%!%  #!$!%!% $ "!$!%"#)0*0)0*1*1# $ # *0%+*0!    !!!!     07 07 07(/.5.5/6/6/7 "!! ! !!)/                                          "6?#5>!4= 4= 4= 6?"7@#6?"6?"5>"5>"6?"2;+42;5>!5=!4< '0'/2:1:$-'0-6%.+4'1)3-7 "####### #!$"%"%!$ ## #",3# -4-4# +2# !%!%!$ $!" # $ $!"!$!%"%!% ## #!%!%!$!! $ "#%+" *0*1# $ *0+2,2"    !!!      .5/6/6/6 "!!     !)/                                           9D!         !K['5>"5>"6?"6?"5>!5>!5>!6@"7@#6?"6?"5>"5>"5>"09+43;4=!4=!3< '0&/191:%.$--6%-*3&/+5)3!####### # $!%"%"%!% $ # #!$ #*1+2+2,3+1,3*1%,!$" # # ! $"!" $!%!%!% ## #!$!%!$"#"#!(.+2*0(/*0+1$ # *1" *0        !!    /6/6 "!!!  !!(-                                           %         .5    !!!!  M]*6?"6>"6?"7@#6?"5?!6?"6@"6?"6?"5>"5>!5>"5>!/7+42;4=!4=!3; '0%-081:(0$--5$-(1%/+4(2"####### # $!%"&"&!% $ # #!#.4# .5+1*1*1'. #!!! # # $"!" #!%!%!% ###!$!$!$#"!(.*0# *1+2# $ +2,3,2*0!            #!!! ! !!).                                            (. !      .5.5$$+2+2,3.4,2-4-4-4+2+1$$+1+1+1# !!! !L\)6?"6?"6?"7@#6?"6?"6?"6?"6?"6>"5>!5>!5>!4=!-6+42;4!5>!5=!5=!3< ,5+42:3< 3< 2:)1#,-609,4#,)2)1%.(1'0######## # $!$"&"&"&!% $ # #!$!$!$+2.5%,3*1+2*0 #!$# $!" !$#!!#!$!%!% ##"# !$!$ $!!*0,2# ,2*1+2$ # *0-3+1        !  !!!(.                                             %+ /6 .5.5/6 )0.5,3$+2-4*1,2#$## $$# # # # *0# *0*1+1+1*1*1*1# # *1# # ""!! KZ(6>"6?"5>"6?"6?"6?"6?"6?"5>"5>!5=!4=!4=!3;+4+41:3; 3; 2:)2#++4/8-6#+&/*3$-)2#"###### # $ $!%"&"&"&!% $ #" $"!$-3+2+2# )0*0*0&- #! #"!$!$#!!" $!$!% ##"# !$!$ $""""&,,3,2,2*0+2# $ )0*1-3,3!            !!&,                                               ! /6 /6 /6 /6 /6-4.5$,3$-4*0,3$$# +2$## # # )/+1*0*0*0*0*0*0*0*0*0*0# # *1"('.(.*1 # #"""!   IY(5>"5>"5>!6?"6?"5>!5>!5>!5>!5=!4=!4=!4=!2:+3+3192;2;1:+3#+(1/7.6"+#,*3#,(1#"###### # $ $!%"&"&"&!% $ # $ # #"%,3.4-4# +2# *0 #!!$!$ #! " #!$!% ##""  !$!$# !!+1,3# +2*1*1$ # *1# -4          !!!*0                                                /6 /6 /6 /6/6-4.5$$-4)0,3*1+1*0*0## # *1+1*0*0*0'.*1*0! +1'.# $!% $"!%!%"%#!%"%"&"&"%!$ $ # #""  !HW&5>"5>"5>!5>"5>!5>!5>!5>!5=!4=!4=!4!5>!5>!5>!5>!5=!5=!4=!4=!4< 3< 3< 19*2)2/81:1:09-5"*"++3-5*2"*$,'0#"###### # $ $!$!%"&"%"%!% $! #!$!$!$*1,3,3# *1(/ !$! #!$!$!$ #! !# $!$ $#"   !$!$ $" "!,2,3*1*1*1*1$ $ +1*1,3(/        #)                                                !    /6 /6 /6 /6/6.5)0,3,3+2,2*1+1+1,2+2*1#,3,3*1-3-3-3 # # $! #   $!$!$" $#"!%!$!$!$ # #"""ES$4=!4=!5=!5>!4=!4=!4=!4=!4=!4< 3< 3< 3; 09)2)1/7191908.6#+"*'0,4+3&.!)#!###### # # $ $!$!%"&"%"%!% $  #!$" $!%# *1+2# +1*1" "!$!$!$ #! !" $!$!$#"   #!$ $#  -4(/# +1+1+2$ # +1+2-4                                                   !  /6 /6 /6/6/6,3-4-4+2,3)0+2+1+1*0+1#-3,3+1-4.5.5# #"#! #!  #       " ##  #!$ $!$!$!%!$!$!$ # #""! "FT%4=!4=!4=!4=!4= 4= 4= 4= 4< 3< 3< 3; 2;08)1(0.6090908.6'/!)"*)1*2(0%-######## # $ $ $!$!%"%!%"%!%!$ !$!!$!%# ,3# )/" "  #!$!$!$ #"  " #!$!$##   #!$ $ #!!" ,3-3+1+2*1+2# $ +2*1-4.5                                                   /6 /6 /6 /6/6-4&-+2,2+2,3+1+1+1+2+2$$+2-4$.5/6 "## #"   $"!" $ $    $ # #!" #"!" ## !$ #!$!$!$"!$!$ ##"" "-84=!4=!4=!4=!4< 4< 4< 4< 3< 3< 3; 2; 2:/8)1'/-50808/7.6*2!)!)"*&.&-$"###### $ $ $ $ $!$!%"%!%"%!%!$!$#!$")0,3# )/)0*0 $ $!$!$!$ #"  " # $!$ #   ## $ $ #"" -4)0+1-4+1*1$ $ ,2-4/5                                              )2!  ! /6 /6 /6/6/6-4*1-4+2,3*1+2+1+2+2+2$-4.4.5.5.6"" ###"#"#!$ # !%!%# #!%!%!% !%!%!$ $#! $""" #! #!! $ !$!$"!$!$" # #""! !*43< 4< 4= 4= 3< 3< 3< 3< 3< 3; 2; 2;1:/8)1&.+4/7/7.7-5+3'/!)'!)"######## $ $ $ $ $!$!%!%!$!%!%!$ $ $!$ $!%# # # $ )0*0$+ $!$!$!$!$ #"  !# $!$ #   ## $ $#"-3-4,3-3*1,3+2$ +2+2-4,2                                              !   ! /6 /6 /6/6.5)0-4-4+2,3(/+2+1,3,3$$.5.5/607  "!"#" ## $ $!% "&!%#'#'#($( $#($($($(#(#'#'#'"&"&"&#"!%# ## #"" #!"! $!!$! $!$!$"!$ # #""!  !'03< 3< 4< 3< 3< 3;3; 3; 3; 2; 2;2:19/7)1%-)1-6.6.6-5+3)1'/&-&$"###### $ $ $ $ $ $!$!%!%!$!%!%!$ $!$#"!%# # $ *0# )0!# #!$!$ $"  !" #!$ $   ## $ $ # #"! .5+2+2-4+1*1+2$ ,2+1*0/6 Release_v0.3/kernels/compiler_rotate.cl000066400000000000000000000002171223142177000203560ustar00rootroot00000000000000kernel void compiler_rotate(global int *src, global int *dst, global int *y) { int i = get_global_id(0); dst[i] = rotate(src[i], y[i]); } Release_v0.3/kernels/compiler_sampler.cl000066400000000000000000000016041223142177000205240ustar00rootroot00000000000000/* test OpenCL 1.1 sampler declaration */ __kernel void compiler_sampler () { #define S(A,B,C) CLK_NORMALIZED_COORDS_##A | CLK_ADDRESS_##B | CLK_FILTER_##C const sampler_t \ s0 = S(TRUE,REPEAT,NEAREST), s1 = S(TRUE,REPEAT,LINEAR), s2 = S(TRUE,CLAMP,NEAREST), s3 = S(TRUE,CLAMP,LINEAR), s4 = S(TRUE,NONE,NEAREST), s5 = S(TRUE,NONE,LINEAR), s6 = S(TRUE,CLAMP_TO_EDGE,NEAREST), s7 = S(TRUE,CLAMP_TO_EDGE,LINEAR), s8 = S(TRUE,MIRRORED_REPEAT,NEAREST), s9 = S(TRUE,MIRRORED_REPEAT,LINEAR), s10 = S(FALSE,REPEAT,NEAREST), s11 = S(FALSE,REPEAT,LINEAR), s12 = S(FALSE,CLAMP,NEAREST), s13 = S(FALSE,CLAMP,LINEAR), s14 = S(FALSE,NONE,NEAREST), s15 = S(FALSE,NONE,LINEAR), s16 = S(FALSE,CLAMP_TO_EDGE,NEAREST), s17 = S(FALSE,CLAMP_TO_EDGE,LINEAR), s18 = S(FALSE,MIRRORED_REPEAT,NEAREST), s19 = S(FALSE,MIRRORED_REPEAT,LINEAR); } Release_v0.3/kernels/compiler_saturate.cl000066400000000000000000000007551223142177000207170ustar00rootroot00000000000000#define TEST_TYPE(TYPE) \ __kernel void test_##TYPE(__global TYPE *C, __global TYPE *A, __global TYPE *B) { \ int id = get_global_id(0); \ C[id] = add_sat(A[id], B[id]); \ } TEST_TYPE(char) TEST_TYPE(uchar) TEST_TYPE(short) TEST_TYPE(ushort) TEST_TYPE(int) TEST_TYPE(uint) //TEST_TYPE(long) //TEST_TYPE(ulong) #undef TEST_TYPE Release_v0.3/kernels/compiler_saturate_sub.cl000066400000000000000000000007551223142177000215700ustar00rootroot00000000000000#define TEST_TYPE(TYPE) \ __kernel void test_##TYPE(__global TYPE *C, __global TYPE *A, __global TYPE *B) { \ int id = get_global_id(0); \ C[id] = sub_sat(A[id], B[id]); \ } TEST_TYPE(char) TEST_TYPE(uchar) TEST_TYPE(short) TEST_TYPE(ushort) TEST_TYPE(int) TEST_TYPE(uint) //TEST_TYPE(long) //TEST_TYPE(ulong) #undef TEST_TYPE Release_v0.3/kernels/compiler_shift_right.cl000066400000000000000000000002011223142177000213630ustar00rootroot00000000000000kernel void compiler_shift_right(global uint *src, global int *dst) { int i = get_global_id(0); dst[i] = src[i] >> 24; } Release_v0.3/kernels/compiler_short_scatter.cl000066400000000000000000000001721223142177000217440ustar00rootroot00000000000000__kernel void compiler_short_scatter(__global short *dst) { int id = (int) get_global_id(0); dst[id] = (short) id; } Release_v0.3/kernels/compiler_smoothstep.cl000066400000000000000000000003011223142177000212570ustar00rootroot00000000000000kernel void compiler_smoothstep(global float *src1, global float *src2, global float *src3, global float *dst) { int i = get_global_id(0); dst[i] = smoothstep(src1[i], src2[i], src3[i]); } Release_v0.3/kernels/compiler_step.cl000066400000000000000000000021661223142177000200400ustar00rootroot00000000000000#define COMPILER_STEP_FUNC_N(TYPE, N) \ kernel void compiler_step_##TYPE##N ( \ global TYPE##N* edge, global TYPE##N* x, global TYPE##N* dst) { \ int i = get_global_id(0); \ dst[i] = step(edge[i], x[i]); \ } kernel void compiler_step_float (global float* edge, global float* x, global float* dst) { int i = get_global_id(0); dst[i] = step(edge[i], x[i]); } COMPILER_STEP_FUNC_N(float, 2) COMPILER_STEP_FUNC_N(float, 3) COMPILER_STEP_FUNC_N(float, 4) COMPILER_STEP_FUNC_N(float, 8) COMPILER_STEP_FUNC_N(float, 16) #define COMPILER_STEPF_FUNC_N(TYPE, N) \ kernel void compiler_stepf_##TYPE##N ( \ float edge, global TYPE##N* x, global TYPE##N* dst) { \ int i = get_global_id(0); \ dst[i] = step(edge, x[i]); \ } kernel void compiler_stepf_float (float edge, global float* x, global float* dst) { int i = get_global_id(0); dst[i] = step(edge, x[i]); } COMPILER_STEPF_FUNC_N(float, 2) COMPILER_STEPF_FUNC_N(float, 3) COMPILER_STEPF_FUNC_N(float, 4) COMPILER_STEPF_FUNC_N(float, 8) COMPILER_STEPF_FUNC_N(float, 16) Release_v0.3/kernels/compiler_structure_attributes.cl000066400000000000000000000005541223142177000233720ustar00rootroot00000000000000#define X(x, y) x ## y #define NAME(x, y) X(x, y) #define S struct NAME(s, __LINE__) { \ char c; \ int i; \ float f; \ } S __attribute__((aligned(16))); S __attribute__((aligned)); S __attribute__((packed)); S __attribute__((endian(host))); S __attribute__((endian(device))); S __attribute__((endian)); __kernel void compiler_structure_attributes() { } Release_v0.3/kernels/compiler_switch.cl000066400000000000000000000012421223142177000203600ustar00rootroot00000000000000__kernel void compiler_switch(__global int *dst, __global int *src) { switch (get_global_id(0)) { case 0: dst[get_global_id(0)] = src[get_global_id(0) + 4]; break; case 1: dst[get_global_id(0)] = src[get_global_id(0) + 14]; break; case 2: dst[get_global_id(0)] = src[get_global_id(0) + 13]; break; case 6: dst[get_global_id(0)] = src[get_global_id(0) + 11]; break; case 7: dst[get_global_id(0)] = src[get_global_id(0) + 10]; break; case 10: dst[get_global_id(0)] = src[get_global_id(0) + 9]; break; case 12: dst[get_global_id(0)] = src[get_global_id(0) + 6]; break; default: dst[get_global_id(0)] = src[get_global_id(0) + 8]; break; } } Release_v0.3/kernels/compiler_type_casting.cl000066400000000000000000000007061223142177000215540ustar00rootroot00000000000000/* test OpenCL 1.1 Conversions & Type Casting Examples (section 6.2) */ __kernel void compiler_type_casting() { float f = 1.23456789f; float g; g = (float)f; g = convert_float(f); g = as_float(f); g = convert_float_rte(f); g = convert_float_rtz(f); g = convert_float_rtp(f); g = convert_float_rtn(f); g = convert_float_sat_rte(f); g = convert_float_sat_rtz(f); g = convert_float_sat_rtp(f); g = convert_float_sat_rtn(f); } Release_v0.3/kernels/compiler_uint16_copy.cl000066400000000000000000000002141223142177000212350ustar00rootroot00000000000000__kernel void compiler_uint16_copy(__global uint16 *src, __global uint16 *dst) { int id = (int)get_global_id(0); dst[id] = src[id]; } Release_v0.3/kernels/compiler_uint2_copy.cl000066400000000000000000000002101223142177000211440ustar00rootroot00000000000000__kernel void compiler_uint2_copy(__global uint2 *src, __global uint2 *dst) { int id = (int)get_global_id(0); dst[id] = src[id]; } Release_v0.3/kernels/compiler_uint3_copy.cl000066400000000000000000000002101223142177000211450ustar00rootroot00000000000000__kernel void compiler_uint3_copy(__global uint3 *src, __global uint3 *dst) { int id = (int)get_global_id(0); dst[id] = src[id]; } Release_v0.3/kernels/compiler_uint3_unaligned_copy.cl000066400000000000000000000003011223142177000231740ustar00rootroot00000000000000__kernel void compiler_uint3_unaligned_copy(__global uint *src, __global uint *dst) { const int id = (int)get_global_id(0); const uint3 from = vload3(id, src); vstore3(from, id, dst); } Release_v0.3/kernels/compiler_uint8_copy.cl000066400000000000000000000002101223142177000211520ustar00rootroot00000000000000__kernel void compiler_uint8_copy(__global uint8 *src, __global uint8 *dst) { int id = (int)get_global_id(0); dst[id] = src[id]; } Release_v0.3/kernels/compiler_unstructured_branch0.cl000066400000000000000000000004021223142177000232200ustar00rootroot00000000000000__kernel void compiler_unstructured_branch0(__global int *src, __global int *dst) { int id = (int)get_global_id(0); dst[id] = src[id]; if (dst[id] >= 0) goto label; do { dst[id] = 1; label: id += get_local_size(0); } while (id < 32); } Release_v0.3/kernels/compiler_unstructured_branch1.cl000066400000000000000000000004231223142177000232240ustar00rootroot00000000000000__kernel void compiler_unstructured_branch1(__global int *src, __global int *dst) { int id = (int)get_global_id(0); dst[id] = src[id]; if (dst[id] >= 0) goto label1; dst[id] = 1; if (src[id] <= 2) goto label2; label1: dst[id] -= 2; label2: dst[id] += 2; } Release_v0.3/kernels/compiler_unstructured_branch2.cl000066400000000000000000000005371223142177000232330ustar00rootroot00000000000000__kernel void compiler_unstructured_branch2(__global int *src, __global int *dst) { int id = (int)get_global_id(0); dst[id] = src[id]; if (dst[id] < 0) goto label1; dst[id] = 1; if (dst[id] > src[id]) goto label3; dst[id]++; if (src[id] <= 2) goto label2; label1: dst[id] -= 2; label2: dst[id] += 2; label3: dst[id] *= 3; } Release_v0.3/kernels/compiler_unstructured_branch3.cl000066400000000000000000000004401223142177000232250ustar00rootroot00000000000000__kernel void compiler_unstructured_branch3(__global int *src, __global int *dst) { int id = (int)get_global_id(0); dst[id] = src[id]; if (dst[id] >= 2) goto label1; dst[id] = 1; if (src[id] < 2) goto label2; dst[id]--; label1: dst[id] -= 2; label2: dst[id] += 2; } Release_v0.3/kernels/compiler_upsample_int.cl000066400000000000000000000002431223142177000215570ustar00rootroot00000000000000kernel void compiler_upsample_int(global short *src1, global ushort *src2, global int *dst) { int i = get_global_id(0); dst[i] = upsample(src1[i], src2[i]); } Release_v0.3/kernels/compiler_upsample_long.cl000066400000000000000000000002411223142177000217220ustar00rootroot00000000000000kernel void compiler_upsample_long(global int *src1, global uint *src2, global long *dst) { int i = get_global_id(0); dst[i] = upsample(src1[i], src2[i]); } Release_v0.3/kernels/compiler_vect_compare.cl000066400000000000000000000002501223142177000215240ustar00rootroot00000000000000__kernel void compiler_vect_compare(__global int4 *src, __global int4 *dst) { int4 test = (int4)(0,0,0,0); dst[get_global_id(0)] = test < src[get_global_id(0)]; } Release_v0.3/kernels/compiler_vector_inc.cl000066400000000000000000000004551223142177000212170ustar00rootroot00000000000000kernel void compiler_vector_inc(global char *dst, global char *src) { size_t i = get_global_id(0); char2 dst2 = vload2(i, dst); if (src[i] == 0) dst2++; else if(src[i] == 1) ++dst2; else if(src[i] == 2) dst2--; else --dst2; vstore2(dst2, i, dst); } Release_v0.3/kernels/compiler_vector_load_store.cl000066400000000000000000000023551223142177000226020ustar00rootroot00000000000000/* test OpenCL 1.1 Vector Data Load/Store Functions (section 6.11.7) */ #pragma OPENCL EXTENSION cl_khr_fp64 : enable #define OFFSET2(type) (type ##2) {(type)1, (type)2} #define OFFSET3(type) (type ##3) {(type)1, (type)2, (type)3} #define OFFSET4(type) (type ##4) {(type)1, (type)2, (type)3, (type)4} #define OFFSET8(type) (type ##8) {(type)1, (type)2, (type)3, (type)4, (type)5, (type)6, (type)7, (type)8} #define OFFSET16(type) (type ##16) {(type)1, (type)2, (type)3, (type)4, (type)5, (type)6, (type)7, (type)8, (type)9, (type)10, (type)11, (type)12, (type)13, (type)14, (type)15, (type)16} #define TEST_TYPE(type, n) \ __kernel void test_##type ##n(__global type *pin, \ __global type *pout) \ {\ int x = get_global_id(0); \ type ##n value; \ value = vload ##n(x, pin); \ value += OFFSET ##n(type); \ vstore ##n(value, x, pout); \ } #define TEST_ALL_TYPE(n) \ TEST_TYPE(char,n) \ TEST_TYPE(uchar,n) \ TEST_TYPE(short,n) \ TEST_TYPE(ushort,n)\ TEST_TYPE(int,n) \ TEST_TYPE(uint,n) \ TEST_TYPE(float,n) \ TEST_TYPE(double,n)\ TEST_TYPE(long,n) \ TEST_TYPE(ulong,n) #if 0 TEST_TYPE(half,n) #endif TEST_ALL_TYPE(2) TEST_ALL_TYPE(3) TEST_ALL_TYPE(4) TEST_ALL_TYPE(8) TEST_ALL_TYPE(16) Release_v0.3/kernels/compiler_volatile.cl000066400000000000000000000002551223142177000207010ustar00rootroot00000000000000__kernel void compiler_volatile(__global int *dst, __local volatile int *hop) { hop[get_global_id(0)] = get_local_id(1); dst[get_global_id(0)] = hop[get_local_id(0)]; } Release_v0.3/kernels/compiler_vote_all.cl000066400000000000000000000002731223142177000206670ustar00rootroot00000000000000__kernel void compiler_vote_all(__global uint *src, __global uint *dst) { int id = (int)get_global_id(0); if (__gen_ocl_all(id > 8)) dst[id] = src[id]; else dst[id] = 0; } Release_v0.3/kernels/compiler_vote_any.cl000066400000000000000000000002731223142177000207060ustar00rootroot00000000000000__kernel void compiler_vote_any(__global uint *src, __global uint *dst) { int id = (int)get_global_id(0); if (__gen_ocl_any(id > 6)) dst[id] = src[id]; else dst[id] = 0; } Release_v0.3/kernels/compiler_workitem_builtin.cl000066400000000000000000000004721223142177000224520ustar00rootroot00000000000000/* test case for OpenCL 1.1 work-item built-in functions */ __kernel void compiler_workitem_builtin() { uint x = get_work_dim(); size_t y = get_global_size(0); y = get_global_id(0); y = get_local_size(0); y = get_local_id(0); y = get_num_groups(0); y = get_group_id(0); y = get_global_offset(0); } Release_v0.3/kernels/compiler_write_only_bytes.cl000066400000000000000000000001651223142177000224630ustar00rootroot00000000000000__kernel void compiler_write_only_bytes(__global char *dst) { int id = (int)get_global_id(0); dst[id] = 2; } Release_v0.3/kernels/compiler_write_only_shorts.cl000066400000000000000000000001671223142177000226610ustar00rootroot00000000000000__kernel void compiler_write_only_shorts(__global short *dst) { int id = (int)get_global_id(0); dst[id] = 2; } Release_v0.3/kernels/empty.cl000066400000000000000000000000311223142177000163160ustar00rootroot00000000000000__kernel void empty() {} Release_v0.3/kernels/lenna128x128.bmp000066400000000000000000001400661223142177000173300ustar00rootroot00000000000000BM66(  ZZZZZZZZZ|||fffMMMbbbNNN(((333:::...111333666:::111222GGGAAA,,,111:::EEEAAA***BBB[[[iii111111(((222)))""")))===MMMfffnnnwwwlll)))MMMwww|||ZZZbbbjjjfffbbbZZZRRRIII===((((((Ťnnn===>>>MMMAAAIIIjjjjjjRRRIIIMMMZZZ|||fffRRRIII111111AAA===fffRRRZZZZZZZZZ222333(((------333***111---EEE:::666111---$$$===***,,,(((bbbbbb(((666+++)))..."""(((...MMMYYYjjjnnnzzzRRR(((ddd|||ֻZZZjjjbbbfffZZZMMMIII666...$$$...ϼױddd777CCCMMMIIIMMMbbbjjjIIIMMMjjjjjjdddZZZ111===(((""""""fffMMMMMMMMMjjjLLL111(((***111666---GGG,,,AAAEEEAAA111:::---...%%%[[[zzziiiNNNddd"""111===111111""")))(((111YYYfffrrr|||yyy777111lllMMMZZZbbbZZZZZZZZZMMMAAA===111===ZZZvvvŐRRRCCCMMMZZZMMMZZZjjjZZZZZZMMMZZZoooZZZIII666$$$,,,ddd===ZZZ|||ddd\\\(((EEE...(((***666AAA000:::GGG333,,,333111***zzzzzz111GGGEEEAAA:::RRR+++111,,,......"""111CCCMMMrrryyywww(((ZZZ|||MMMMMMIIIMMMRRRRRRRRRRRRZZZffffffϱyyyYYYMMMZZZZZZMMMfffjjjZZZZZZZZZjjjZZZIII111$$$"""(((ddd===RRRuuu\\\LLL(((111333111111::::::333:::SSS(((:::---((("""EEEBBB:::SSS,,,GGG(((bbbddd(((111...)))333$$$111IIIMMMnnnnnnyyy|||IIIIIIuuuZZZ===AAAMMMZZZZZZjjj|||~~~www׼fffZZZMMM]]]MMMMMMjjjZZZjjjZZZfffZZZRRR>>>$$$"""~~~===\\\666AAA******333******(((::::::000AAAAAA666,,,(((bbb%%%GGGzzzGGG:::,,,:::111:::+++...III111111"""+++777YYYnnnyyyyyyrrr111ZZZ֮ZZZZZZfffrrr~~~ŤwwwfffYYYYYYfffMMMZZZjjjjjjZZZZZZfffZZZRRR(((((((((~~~111\\\===333,,,(((:::SSS(((000AAA:::111:::000111(((:::SSS::::::AAASSSbbb[[[BBBzzz(((===333===999111...111MMMnnnyyy}}}}}}>>>bbbuuufffrrrűnnnnnnfffMMMgggZZZMMM^^^fffbbbZZZZZZfffZZZ(((111~~~...\\\222---******---111EEE333111::::::---000EEE(((333[[[bbb:::GGGAAA[[[[[[333111===+++999+++111111+++IIIfffzzzyyyzzzZZZMMMlllyyyCCC|||ֶnnnlll]]]YYYfffMMMMMMbbb^^^ZZZjjjZZZZZZ""""""...~~~+++\\\---111222111333***111666:::EEE666AAA111EEE(((%%%((([[[%%%[[[[[[zzzzzzbbb***zzz(((===333SSSbbbAAA111...IIIMMMyyy|||fffIIIfffü111~~~|||yyyuuufffZZZdddfffZZZZZZgggbbbfff\\\ZZZ""""""EEE|||$$$\\\666***111(((......GGG111333SSS:::111(((:::***---SSSEEE((((((BBBzzzzzz(((\\\RRRrrrdddfff111+++111111MMMjjjyyyyyyyyy111jjjzzzuuu$$$|||xxxכwwwllllll]]]ffflll\\\ZZZZZZffffffZZZZZZ"""SSSfff$$$]]]uuu999BBB(((***(((111:::SSS(((\\\111111SSS111111***SSS(((---AAAzzzGGG[[["""666===III===:::+++111111IIIfffyyyuuurrr+++ZZZyyyyyy"""uuuŃffffffffffffnnnbbbZZZ]]]jjjjjjZZZZZZ""""""LLLfff$$$fff---333AAA((((((...(((---~~~%%%,,,:::((("""---333AAA111zzzGGG[[[bbb333MMM666III666111111666===MMMuuuuuu111jjjyyy"""~~~uuurrrŤfffYYYMMMYYYllllllbbbZZZffffffdddfff""""""SSSRRR$$$fffGGG999---...((((((AAA[[[111111---222------111GGGbbbzzzAAABBB999ZZZEEE111222111666333111777nnnnnnyyy+++ZZZnnnϻ""""""~~~{{{ϱfff]]]]]]fff{{{{{{bbbMMMZZZjjjjjjjjj(((***\\\\\\(((lll,,,(((:::---333111:::000(((GGG$$$EEEAAA"""000GGGbbbbbbEEEGGG000,,,,,,MMM111333:::111:::(((...777]]]YYYrrr111jjjnnn111""""""~~~uuuxxxρ]]]ZZZZZZlllllllllZZZZZZfffjjjmmm$$$(((zzzfff"""nnnnnnLLL(((:::AAA(((111***---~~~***NNN"""111EEEGGG---EEEBBBbbb000[[[zzz333999===,,,======...333...111)))333111MMMfff\\\111MMMyyy|||yyy+++)))...|||~~~xxx϶___YYYfffbbbllllllbbbMMMZZZjjjZZZ(((EEE~~~ZZZ)))nnnMMM"""(((((((((333111***000ddd***:::(((EEE000iiiEEEGGGdddBBBbbb[[[***999EEE(((111===111666666AAA...:::111777rrrnnn111MMMuuu|||===)))$$$111ggguuuxxxmmmMMMZZZZZZbbbbbblll^^^ZZZbbbjjj222\\\RRR111gggLLL(((((((((---SSSAAA:::000\\\222EEE~~~000[[[bbbAAAGGGBBBbbbzzzzzz===RRR,,,,,,:::===666666333:::333333999AAAnnnlll+++ZZZrrrzzz|||...111111>>>MMM~~~rrrΖIIIMMMZZZjjjbbblllxxxZZZ^^^bbbfff|||III111nnnRRR%%%,,,333...EEE:::666222GGGGGG111---666---iii:::999GGGzzzGGGdddiii~~~[[[111(((666AAA666111...:::111222111CCCfffddd+++MMMrrrnnn|||...+++111>>>MMM===uuuuuu~~~uuuwwwvvvαvvv777bbbZZZbbbbbbxxxfffZZZZZZ~~~~~~fff===111vvv\\\::::::...)))333:::333AAA:::999bbbddd((([[[000AAAAAA[[[[[[iii(((bbb...333666:::666666333666:::---======ZZZAAA...MMMkkkyyyuuuLLLRRRIII===ZZZ:::999uuu|||rrrwww\\\ő(((IIIfffZZZMMMbbbfffZZZZZZ|||ZZZRRR111===nnnlllZZZ,,,,,,---iiiiiiuuuEEE[[[(((~~~000zzzGGGSSS[[[iiiiiizzz666|||000fff666AAA000AAA666666333:::AAA666111===IIIMMM111111ZZZvvvyyyuuuzzzGGG...ZZZIII777MMMAAALLLuuuuuuZZZZZZvvvMMMλZZZ$$$MMMZZZIIIMMMZZZZZZMMM~~~ZZZLLLCCC111>>>nnnxxx000111(((---EEE%%%(((EEEddd(((EEEEEEBBBAAAzzzGGGdddddd[[[666III***999666666666333111111333:::===MMM111===ZZZnnnnnnyyyzzz\\\===jjjIII>>>MMMIII===uuurrrfffZZZZZZxxxxxxIIIμMMM111IIIIIIRRRZZZfffMMM~~~uuuZZZIII111===111AAAnnnǖfffIII===((((((111NNN$$$---rrrGGG[[[bbbAAA[[[AAA[[[[[[zzz\\\|||---===ZZZ111999+++666222===111222...===666======111ZZZuuuzzzyyyzzzzzzϴ999,,,"""RRRjjjIIIMMMMMMAAALLLuuuuuuuuujjjfffbbbZZZfffZZZjjjfffZZZϡAAA333===IIIIIIZZZZZZIII===:::666--->>>AAAZZZ===999000(((fff222EEE999bbb:::---bbbiii[[[bbbSSSiiibbbiiibbb|||GGG000===ZZZ333AAA:::AAA111666+++333111:::666===666MMMYYYnnnzzz|||ƴAAA---AAA,,,333ZZZuuuIIIIIIZZZCCCLLLxxxxxxmmmrrrfffjjjfffjjjuuuuuu~~~|||jjjλfff+++111===MMM___666666...(((***AAAMMM]]]ooo~~~GGG333(((NNN---GGG---111111BBBiiiEEE---AAAiii[[[(((iii,,,---111MMM...333...111222...222666:::======666111IIIMMMeeeơ:::111:::111...---...AAAjjjZZZIIIIIICCCLLLlll{{{lllmmmuuummmuuuuuu~~~~~~|||ϻRRR((((((111MMM111...(((...$$$MMMZZZfff]]]rrrCCC:::[[[666666AAA111---,,,GGGBBBBBB999zzz,,,,,,...333III333333666666999(((111666666=========666MMMCCCZZZfffooozzzAAA:::AAA:::---666:::***...>>>uuuRRRIIIZZZAAA>>>uuuuuurrrxxxuuu~~~~~~fffļ===)))(((111...---333***111MMMbbbnnnlll===jjjddd333AAAGGG:::GGGSSS---000,,,zzz999GGG[[[[[[iii~~~,,,000111666666CCC...666333111666111(((666===666AAA666>>>MMM111+++CCCCCC777YYYxxxuuu...:::,,,:::666:::333666111>>>fffIIIIIIMMM111777xxx~~~|||~~~ŻAAA222............===MMMbbbuuujjjAAAMMMooo---000zzz(((EEE:::111---bbbzzz000AAAiiiiiirrrZZZ:::666(((:::333...===222AAA666666666111666777>>>===111666>>>111+++CCCRRRjjjyyy+++>>>111:::333===666666666>>>jjjMMMIIIMMM===111~~~||||||ü...+++)))222111fffbbbuuunnnCCCZZZdddNNNSSSddd111333EEE***999GGGiiiBBBzzzzzziiizzzbbboooddd:::(((333333---===666...222111...333111===666===III111MMM777CCC\\\\\\rrryyyAAA111111:::333AAA======111777jjjMMMCCCAAA666111fff111)))111222666bbbgggfffCCCNNNNNNBBBZZZ:::---AAA111999AAA[[[bbbzzzbbbGGGqqqdddbbb::::::333333---===111---111111...666666AAA111AAA>>>111CCCMMM\\\lllwwwyyy|||...111,,,***666AAA111666111bbbIIIIIIMMM======||||||AAA666666111+++jjjjjjuuunnn777ffffffNNNEEEEEE666111:::666000iiiiii[[[iii000rrr,,,***......===111333666333666777777===...===III===RRR\\\bbbrrruuu|||$$$...(((111666666222111+++bbbMMMCCC>>>RRR111|||uuu:::666111111===MMMZZZxxxfffCCC^^^rrreee999:::zzz333GGG(((bbbSSSbbbbbbGGGbbbrrrrrrЫ---333:::111...+++111...777111AAA===666111===MMMYYYlllrrryyyyyy|||yyyuuuyyyqqqqqq{{{===(((...+++666)))111+++bbbAAAMMMCCC===333~~~===666===AAA777CCCZZZmmmlllCCCLLL^^^ZZZ999***,,,iiiiiibbbSSSBBBzzzyyylll::::::666666...666666111===III111LLLMMM===MMMjjjjjjrrryyyyyy|||zzzbbb******===666:::>>>+++fffMMMMMM111111---~~~666======111IIICCCZZZuuuCCCMMMfffoooZZZNNN...000bbb999BBB[[[[[[bbb[[[:::[[[fffnnn111(((111111(((===...111>>>III111===ZZZ===\\\jjjyyywwwyyyyyyyyyyyy(((---AAA..."""III+++uuuYYYIII======---666======AAAAAAAAA\\\oooeeeMMM]]]666999(((:::AAA999000EEEbbbbbbbbb[[[bbbSSS]]]bbb(((333...)))===111111>>>III...MMMMMMMMMbbbwwwyyyyyyqqqqqqMMM***222...---AAA111ZZZMMMIII===111(((uuu777===777MMM777IIIMMMjjjIIITTTuuu999bbb666666EEEBBBBBBiiibbb[[[BBB[[[---bbbrrrԴgggjjj...AAA+++===666111111MMM$$$MMMMMMCCCrrryyyyyyMMMCCC\\\jjjjjj\\\\\\\\\jjjyyy\\\bbbnnn((((((:::...666LLLLLLIIIIII===111***III>>>CCCIIICCCAAAZZZlllMMMTTTooo]]]EEE(((~~~AAABBB000AAAdddGGG999[[[999dddjjjxxxZZZ111%%%===111"""111AAAIII+++ZZZjjjRRRrrryyyyyyuuuMMM***333333333===>>>MMMMMM>>>$$$...CCCCCCCCCCCC>>>IIIMMMlllCCCMMMfff999NNNdddAAA:::999999AAASSSGGG[[[000zzz$$$EEEjjjZZZ^^^(((===111)))111III>>>"""fffjjj\\\jjjyyyyyyǼ(((:::111111:::IIIAAA===III+++...rrrCCCCCCMMMCCCCCCCCCMMMrrrCCCYYY]]]xxx===999rrr$$$[[[BBBGGGbbbGGG[[[AAAiii~~~:::111111jjjfff^^^mmm(((===111+++III)))$$$fffrrr\\\rrryyyżƩ111------111===IIIMMM777111))):::jjj{{{MMMMMMMMMMMMAAA777MMMuuulllMMMZZZ]]]eeeNNNAAA999bbbAAA---EEE000[[[[[[iii[[[dddNNNNNN===(((dddMMM999...666AAAGGG::::::ddd^^^gggyyyyyy|||nnn111,,,111:::===AAAIII===+++MMMZZZjjjMMMMMMRRRMMMCCCAAAZZZuuujjjMMM\\\```kkkuuuNNN***SSS999:::EEE---bbbbbbiii"""+++777666LLLjjjrrr]]]uuu((((((111AAA111"""uuuMMMrrryyyyyyyyyyyydzzzz111111111222===IIIAAA111111uuuZZZ===MMMMMMYYYMMMCCCCCCMMMnnnrrrIIIMMM]]]eeezzzZZZ999[[[EEE(((zzz[[[[[[SSSGGGzzzNNNzzz,,,$$$111bbb(((EEEfffjjjlllZZZ(((333AAA+++:::uuuMMMjjjwwwyyyyyyqqqlll***333,,,777MMMIII+++(((jjj(((MMMZZZMMMYYYAAA>>>111bbbuuuMMMMMM]]]gggrrr777777NNNoooAAA999SSSiiiBBBbbb---AAA"""ZZZ222(((---GGG...((($$$"""111"""$$$111$$$uuuRRR\\\wwwyyyyyyyyyyyy777\\\\\\yyyǥ}}}+++---(((111MMMMMM111$$$^^^===xxxZZZZZZMMMMMMCCC777+++RRRnnnrrr]]]eeeNNNAAAiiibbbEEE:::000[[[BBBAAA111666...***(((666+++AAAuuu""")))RRR$$$)))(((ZZZ777\\\rrryyy||||||yyyyyyyyyǮZZZ%%%%%%111IIIIII++++++LLLZZZüMMMMMMMMMMMM>>>777+++RRRlllCCCZZZrrroooooooooLLLNNN999GGGBBB000iii[[[%%%(((AAA"""111111LLLZZZZZZ"""(((EEE{{{ddd)))$$$$$$uuuMMMfffrrryyy|||yyyyyy|||ǖnnn,,,(((***IIIMMM"""111)))"""LLLMMMMMMLLLIII>>>111)))RRR|||\\\RRRIIIbbbrrr999dddbbbiiiGGGGGGSSS[[[iiiiiiiii,,,...***%%%333EEECCCZZZMMM...666TTT,,,)))$$$fffCCC\\\rrr|||yyyyyy|||㻻CCC(((111IIICCC"""CCC333(((LLLżMMMMMMZZZLLLIII>>>111RRRfffMMMTTTrrr999ZZZ\\\BBB[[[[[[GGGbbbGGGAAA666AAA""""""***~~~CCCLLL(((""""""xxx""""""RRRCCCfffrrryyy|||yyyyyy㞞|||\\\***111===MMM"""III$$$111111xxx|||MMMIIICCCLLL>>>777111YYYfffCCCZZZeeefff999000999\\\000BBB999ddd000---EEE(((111***AAA>>>)))^^^ZZZuuulllZZZMMMCCC]]]rrryyyyyyyyy|||㗗|||ggg111666===ZZZ"""ZZZ"""===111ZZZxxxϼżMMMAAAMMM777CCC===111RRRfffMMMMMM]]]fffnnnuuuGGG999rrrEEEddd---000zzzAAA(((AAA(((111AAA:::111$$$^^^ddd(((%%%%%%(((vvv{{{,,,EEE777fffyyy|||֩|||nnnCCC"""111ZZZ$$$uuu~~~"""AAA111===~~~uuużMMMMMMMMMMMMMMMAAA777\\\fffMMM]]]]]]ggguuuZZZGGGiii[[[222NNN[[[BBBdddBBBiii111***111333***222MMM(((NNN~~~(((***..."""ooo(((CCCjjjrrr|||||||||Рnnn777***111ZZZ$$$ZZZ)))GGG666)))uuuZZZZZZMMMMMMMMMIII===YYYfffCCCYYYkkkzzzrrrrrrTTT---GGG999GGGNNNBBBiiiAAA333***111111BBB...fffEEE>>>***(((111---:::lll)))777fffyyy|||Ϯ|||}}}CCC111======(((111+++)))111...oooŻϼYYYYYYMMMZZZMMMCCCAAA\\\lllRRRYYY\\\jjjkkkrrrGGG:::EEEzzzzzzNNNiiiGGG***333---iiiiiiGGGBBB\\\LLL%%%******(((lll111RRRrrr|||yyy|||ϮyyyMMM111777===$$$111$$$...111...ZZZû```YYYbbbMMMCCCAAA>>>YYYnnnMMM]]]fffrrrjjjnnnzzzooommm:::ddd333---[[[GGGdddBBBbbb000:::999AAAiii(((...222...333111(((333lllMMMrrryyy|||ǮkkkuuuCCC===111777(((666...666111)))666ü```fffdddZZZTTTMMMCCCffffffMMM]]]dddlllrrrlllNNNBBB\\\(((---(((999zzzEEEGGGAAAbbbGGG[[[SSSGGG...222)))666...(((lllddd\\\yyy|||㼼|||kkkggg```777111======---===666......:::+++ûffffff```\\\TTTMMMCCCffffffIII]]]llljjjnnnlll777rrrfffGGG***---999---333---[[[000ddd***...AAA.........((("""(((nnnYYYjjj|||uuuuuummmzzzyyyyyy㰰xxx>>>MMMZZZ777111===LLL---ZZZ666222"""AAA666uuuû```fff]]]fff\\\ZZZMMMbbbfffZZZ\\\fffjjjrrr\\\111)))ZZZ111SSS000ddd$$$((((((EEEbbbiiiBBB((()))666...666)))"""jjjdddzzzooouuuyyyyyyqqq֘rrrMMMAAA777111SSS999222uuuZZZAAA......999333LLL~~~```]]]\\\dddZZZMMMCCCffffffMMMZZZlllbbbfffffflll111+++EEE000((([[[666BBBEEEAAA[[[zzzzzz333999:::***==="""(((...$$$"""111{{{ggguuuzzzuuuCCCbbbuuu>>>Оuuu|||yyyuuuzzzZZZNNNrrrggg111777)))+++222222ZZZ\\\666666666:::)))...fff]]]]]]]]]]]]MMMIIIllldddCCCMMM\\\fffffffffgggbbb]]]111mmmvvv(((zzz***[[[GGG000[[[111***(((%%%zzz...(((111"""(((dddooo^^^gggMMM+++ZZZIIILLLCCCְCCC\\\jjj|||㥥fff\\\>>>LLLnnn++++++111666...,,,NNNLLL111111111666666333ooo~~~]]]bbb\\\]]]\\\CCCCCClllbbbIIIYYY]]]]]]ffffffbbb]]]\\\XXXıLLL***(((***zzziiiddd:::%%%(((,,,ddd---222111)))[[[((("""IIICCC)))===111***NNNCCCMMMnnnyyyMMM...***MMM)))===111111+++%%%ZZZAAA======111666666...\\\ffffffbbb]]]\\\]]]MMMnnnfffMMMYYY\\\ffflll\\\fff\\\\\\\\\΃jjjLLL$$$(((zzzGGG\\\GGGGGG[[[******---111$$$iiiAAA)))666(((ZZZfffƳ777777111777111+++===111)))777ǼYYY:::===AAA111===777111111(((222jjjNNN:::===)))666666666***]]]ffffffdddggg]]]RRRrrrfffIIITTT\\\fffgggfffffffff\\\fffkkkxxxbbb\\\:::000bbb111bbbdddiii---111(((ddd111+++666+++"""ƴZZZbbbMMMRRR777fff111ZZZ\\\zzzuuuħxxxCCC777CCCCCC>>>***---+++fff...:::===......111666666rrrfffgggbbbbbbfffbbb\\\www```MMMZZZbbb]]]eeefffbbb\\\]]]\\\llljjjnnnIIIdddEEE111111GGGiii---bbb...666***666000ddd(((""""""333999"""EEE|||nnnzzzuuukkk\\\bbbZZZ^^^dzZZZooouuu\\\AAA***+++***ZZZ+++111:::111(((:::===:::dddfff```fffggg]]]]]]MMMnnn]]]CCCMMM]]]]]]mmmfffggg]]]fffgggnnnnnnrrrzzzIII(((ZZZuuu>>>999\\\---000\\\%%%999333EEE999(((+++222$$$---bbblll777AAAnnnļĝbbbZZZCCC111(((......222666LLLEEE===666+++===333NNNgggggg]]]ffffff```YYYyyy]]]CCCZZZ]]]fffggg]]]bbbgggfffggglllrrruuuuuuψ$$$+++lllZZZ999bbb(((zzz"""666999111"""(((666)))***EEE...666"""mmm¼CCCZZZmmmzzzǼĚCCC111%%%***\\\666EEE::::::===666222111111---``````jjj]]]ffffff\\\vvvfffCCCZZZ\\\fff``````ffflllfffnnnlllwwwrrr|||yyynnn{{{zzz777[[[$$$BBBGGG000GGGbbbGGG+++666)))***[[[---333000:::gggϫfff111***666rrrAAA999666:::===111333666111***ddduuuffffffgggffffff```\\\nnnfffMMMMMM]]]bbbggglllllllllgggnnnllluuunnnnnnnnnbbbYYYnnnrrrLLL,,,***SSS***"""$$$:::bbb666666------666---bbb"""lllΝ\\\222(((777666+++666666======)))111111(((SSSgggggg``````]]]]]]RRRyyyeeeTTTZZZ]]]gggfffbbblllffffffbbbnnnvvvrrr|||uuuuuunnn|||jjjzzzCCCBBBiii(((***333>>>---GGG666***\\\***SSSEEEGGGzzzļǏ\\\***(((ZZZZZZ(((333666666======111)))999666---llllllbbbfff]]]fffYYYnnnfffCCCYYY\\\gggffffffffffff\\\gggfffnnnnnnuuunnnuuueeerrrnnnvvvnnneeeTTT666AAA(((NNN***AAA666(((:::------[[[EEE[[["""(((̥ǼyyyIII"""$$$LLL^^^111666111===:::======666111999***dddbbbrrrlll``````bbb]]]wwwgggCCCIII]]]dddggg```fffbbb]]]fffgggrrrwwwlllggg|||wwwnnnuuueee|||IIINNNZZZ777222bbb***---AAA+++...666:::NNNAAA000(((...ϧrrr777***111===jjj:::111111111666EEE===)))...111(((666```]]]bbb```ffffffYYYnnnbbbCCCYYY\\\\\\```ffffffbbbffflllllllllvvvwwwrrrbbbrrrnnngggllluuuzzz^^^oooTTT---NNN666***666111222LLLAAAAAA***:::ddd+++$$$~~~kkkġ\\\+++---LLLCCC111111111666=========111)))++++++:::~~~lllggggggffffffbbb\\\nnn\\\MMMYYYZZZ```]]]\\\bbb\\\ffffffgggnnnkkknnnfffXXXϱ\\\uuummmooo|||uuu+++222LLL***,,,---\\\999EEEAAA---bbbddd...((((((~~~bbbzzzlj\\\"""(((NNNZZZAAA+++)))===777666===666......111---NNNrrrlllllllll```\\\fffRRRuuuYYYCCCMMMZZZ\\\]]]```fff```]]]nnnlllgggrrrvvvlllXXXuuuxxxxxxooouuuZZZZZZLLLAAA---999GGGEEE999GGG---rrr(((AAA......((((((oooNNNiiiǼijMMM---jjjNNN{{{++++++(((111666666III111666))))))...LLLuuuooollllllbbbfff```fff\\\}}}]]]AAAZZZ\\\\\\]]]YYY\\\dddfff]]]bbbrrrlllnnnfffTTTń}}}llluuuzzzjjjzzzoooNNNNNNNNNNNNNNN:::...999NNN999EEE+++333...333---:::AAAbbbrrrƼǮgggLLL~~~RRRlll$$$(((***...111======666666(((666)))AAAfff~~~llllllllllll]]]bbb\\\yyy\\\CCC\\\MMMYYYfff\\\]]]]]]fffbbbbbbllljjjnnn\\\\\\fff|||zzzoooooorrrrrrrrr===NNNGGGddd:::333---ddd***bbb666111GGG[[[999[[[[[[ǼZZZ>>>fffZZZrrr777...***+++666===111666222+++...666333LLLggglllxxxgggffffffbbb|||\\\CCCZZZYYYdddfffffffff\\\fff\\\fffnnnbbbfffRRRֳ}}}llloooooozzzfffoooZZZEEEGGGSSS111---zzz(((***ddd111999:::000zzzbbbuuueeeYYYLLLddd~~~rrrč"""111...111111111===...+++111666EEEuuufffggggggffflll]]]lll|||ZZZMMMMMM\\\]]]\\\\\\```bbb\\\]]]fffrrrfffZZZnnnş]]]}}}uuuooonnnmmmuuuooojjjLLLGGG999GGGAAA222EEE$$$999zzz666---EEEzzz---zzzZZZ...|||ZZZdddrrr"""+++...666111111666((()))===222ZZZllllllbbblllfffffflll|||ZZZCCCZZZ```fff]]]]]]ffffff\\\]]]\\\fff]]]TTTżfffuuuzzzuuuuuummm~~~^^^LLL999AAANNN---333***999iiiNNN---GGGddd[[[(((---~~~ûƛ333~~~ZZZfff((($$$+++666666:::...333111(((222llllllggggggfffbbbwww|||ZZZMMMMMM]]]]]]\\\]]]\\\\\\fff]]]ffffffYYYIIIĶuuueeexxxuuuuuuzzzZZZGGG999ddd333---666NNN~~~NNN---iiiGGG[[[[[[(((ƼʻRRRddd((((((...666111..."""111111333rrrlllfffbbbffffffjjj\\\]]]MMMMMM]]]bbb\\\\\\ffffff\\\fff]]]]]]ffflllŐgggmmmnnnrrrddd999[[[SSS666222bbbzzzddd===bbbLLLbbb(((%%%~~~~~~»»^^^bbbZZZbbb111...---666...(((:::AAA---EEEfffgggffffff\\\nnnwwwYYYCCCMMM\\\]]]ffffff\\\gggffffffbbb\\\RRRżlllvvvxxxeeezzzuuulllzzzuuurrrdddLLL222GGGiiibbbGGG333SSS111[[[ûЍZZZZZZMMM___bbbmmmƈ)))(((111111...333===333(((ggg\\\bbb]]]ffffffqqqMMMAAAMMM\\\]]]```ddd\\\fff```fffjjjfffMMMλrrr```uuu]]]zzzzzzzzzuuuzzzZZZooo^^^rrrbbbzzzGGG000[[[bbb999222***%%%(((:::ƻЮ]]]MMMZZZMMMbbb[[[***111333333111666ZZZ```fffbbb]]]lllfffrrr\\\IIIZZZ```\\\]]]fff\\\bbbfffbbbfffRRRTTTŶnnnllluuufffuuuooofffZZZ===EEErrrbbb[[[BBBGGGEEE---***~~~ƟƻѠ]]]MMMZZZeee)))$$$333666...)))666***NNNbbbfff]]]bbb]]]bbbjjj\\\CCCMMMYYY]]]bbbfff\\\\\\\\\fffbbb\\\TTTϼuuuuuuzzzuuuggg{{{uuuZZZoooZZZfffbbbuuurrr:::֦MMMZZZuuu((("""666......111111666ddd]]]]]]]]]```bbb```www|||YYYCCCIII``````bbb]]]ffffff```\\\fff\\\MMMŶjjjbbbzzzzzzzzzLLLԐZZZ222%%%111:::---...666---LLLbbbgggYYY```bbb\\\lllYYYCCCZZZ\\\fff```bbbfff\\\ddd\\\fff]]]MMMϱzzzyyyeeeuuuzzzϼ(((:::222111000666...SSSuuu]]]dddYYYddd\\\YYY]]]||||||\\\CCCRRR]]]gggdddffffff]]]gggfffbbbfffTTTšfff]]]nnngggyyyН%%%......333***......333LLLfff]]]YYYbbbddd\\\fff}}}\\\MMMZZZ]]]eee```bbbbbb]]]fffffflllbbb\\\šwww```MMMrrruuuʥ´г{{{222(((AAA999++++++111...NNNooo]]]]]]dddffffff]]]bbbwww}}}\\\MMMZZZ]]]bbbffffffbbbbbbffffffffffff```۶jjjgggeeekkk|||```ѮϮ(((333...111333...111333:::fffddd```fffYYYbbblll}}}YYYIIIMMM]]]\\\fff\\\\\\\\\\\\fff\\\bbb\\\ѶyyywwwlllnnnfffRRRuuulllѧ̻̥111...***111333---+++666(((fffffffffgggfffggg\\\lll|||zzzRRRMMMYYY\\\\\\]]]]]]fff]]]dddbbbllllllbbbŗnnnggggggkkklllyyyvvvlll»̴(((***333...)))+++111===***LLL```]]]fff]]]ffffffbbbyyy\\\MMMZZZYYY]]]]]]ffffff\\\fffffflllnnnbbbşbbb]]]eeebbbgggnnnnnnkkkzzznnn´³Ѯ~~~111333666666222......+++>>>ZZZ```]]]fffbbbeee\\\nnnyyy]]]YYYMMMYYY```ffffffbbbbbbffffffnnnllllll```זlllggg\\\```llluuubbbuuullluuuuuuooo »̗Ҵ111:::666666)))++++++111111111```fff]]]ffffff```bbbzzz|||RRRYYYZZZ\\\]]]fffbbblllfff\\\lllnnnnnnnnn```֐yyy]]]\\\llllllrrrvvvxxxzzzzzz|||uuuuuueeeƻϼл,,,222...333666111+++111==="""===```\\\ffffffbbb]]]fffyyyYYYMMMZZZ]]]ffffffgggggggggfffffflllvvvnnnbbbהnnnZZZfff]]]uuunnnyyy```zzzuuu]]]»̠Ԑ^^^(((111666EEE666)))+++666:::...$$$]]]]]]]]]bbb]]]YYY]]]uuu]]]YYYMMMTTTffffffbbbffffffbbbggglllnnnyyyggg۫rrrffflllnnnZZZnnnuuunnnuuuzzzvvvlllм̻uuuddd...Ҵ(((111AAA:::)))+++111333666666(((LLLbbb\\\YYY]]]]]]\\\YYYwwwTTTCCCMMMZZZ```fffdddffffff]]]lllgggnnnlllfffrrr]]]lllnnnrrrnnnuuuzzzzzzuuueeeyyylll̮jjjyyynnn777:::Ѵ...333666111)))111""":::===666...dddZZZZZZ]]]]]]]]]YYY\\\|||ZZZIIIMMMZZZ\\\\\\bbbffffffggglllnnnrrrlllllllllńlllgggnnnlllrrrnnnuuunnnuuu}}}nnn㴴CCC|||nnnMMM..."""ԝ,,,***333666666111222666(((:::111...111bbbZZZZZZZZZYYYRRRZZZyyyZZZMMMMMMZZZ```]]]]]]]]]bbbfffgggllllllllljjjeeennn```gggnnnnnnnnnlllvvvnnnuuuxxx```̮ƻʻMMMZZZ|||uuuMMM...***""",,,Ү㡡***333111666:::666111AAA333111AAA(((ZZZ]]]ZZZYYYZZZZZZMMM\\\|||ZZZMMMMMMYYY]]]bbbllllll]]]bbbxxxlllgggvvvnnnnnnĖ```gggllllllnnnfffgggnnnzzzlllmmm111\\\ZZZ111...+++$$$"""CCC***(((***666666666EEE...666111666666...333dddZZZYYY]]]ZZZZZZMMM]]]RRRMMMMMM]]]]]]bbbggg```]]]```lllvvvnnn{{{uuullllllfffvvvlllgggnnnnnnuuuuuunnn{{{uuuuuuxxxjjjCCC]]]}}}ZZZ111111$$$+++CCCfff}}}...(((+++333===111666666666:::333666111$$$...ZZZYYY]]]YYY]]]TTTbbbnnnZZZMMMMMM```]]]fffbbb]]]]]]fffffflllnnnnnnuuuuuurrrlllllllllvvvvvvnnnlllffffff{{{zzz>>>YYYlllzzzMMM+++---)))AAAfffrrr((((((+++111111:::666+++333111...666111---EEEZZZZZZYYY\\\MMMZZZ]]]}}}zzzbbbCCCYYY\\\ffffffgggffffffdddfffgggrrrnnnyyyuuunnnzzzvvvvvvgggyyynnnvvvxxxnnnlllzzzuuuuuuIIIAAATTTggg|||uuuMMM222$$$)))ZZZ111$$$***...===111666111333111(((666111666666ddd\\\III\\\]]]TTTZZZ]]]yyy]]]MMMYYYbbbfffggg]]]]]]]]]lllllleeennnnnnuuuyyyyyynnnrrruuunnn{{{xxxnnnxxxuuu»ZZZCCCIIITTTnnnuuuTTT***$$$AAAnnn...(((""":::======666+++111===......666333:::ooowwwZZZZZZYYYTTTMMM]]]wwwYYYMMMMMMgggrrrfffgggffffff]]]ffflllrrrnnnyyy|||uuu}}}}}}nnnnnnnnnnnnuuuuuunnnlll^^^ooommm{{{uuuØMMMMMMZZZZZZ]]]nnnuuuMMM$$$===^^^bbb(((***111666===:::111(((666111(((333::::::SSSrrrjjjMMMZZZYYY\\\]]]|||zzz]]]MMMZZZ\\\```lllfffffffffggggggfffvvvlllnnn}}}yyyrrruuuwwwggglllnnnnnneeelllxxxnnnuuuuuuuuuIIIMMMMMMbbbZZZ]]]lllMMM(((MMMnnn111SSSAAA(((666666666...(((666...***...999EEERRR|||zzzZZZMMM\\\MMMbbb|||TTTCCCMMM\\\]]]]]]ffflll```ggglllnnnlllyyy{{{nnn|||}}}yyy}}}yyyvvvlllnnnnnnllluuunnnuuuxxxuuuuuu}}}uuu㚚MMM\\\bbbbbb^^^```uuuuuuCCC777YYYuuunnn***...+++...999666666(((222111......AAAGGGEEE\\\rrrMMMYYYZZZ```|||yyyTTTIIIMMM```fff```bbblll]]]bbbggglllnnnnnnvvvuuuuuu|||yyyyyylllrrrrrrooonnnuuu{{{ZZZddd]]]bbbbbbbbb^^^uuu|||kkkLLLCCCuuuLLL((("""333333666111111(((111666***333333EEEEEEZZZ\\\ZZZYYY]]]YYYAAAMMM]]]]]]fffggg]]]ffffffgggllllllnnnuuuuuunnnuuuyyywwwnnnggglllxxxuuuuuuxxxxxxuuuƻMMMZZZbbbbbbggg```ffflllxxxnnnffflll111,,,***333+++...111222(((666111***333AAASSS(((\\\zzzMMMIII\\\||||||YYYCCCMMM]]]ggg]]]]]]```bbbfffvvvlllnnnnnnrrr}}}uuuyyynnnyyyuuuwwwnnnfffrrruuuzzzuuu»jjjfffllllllggggggnnnggg```nnnuuuuuu...***...(((666666666+++...333666...333EEE(((uuudddMMMfffyyyZZZCCCZZZ\\\gggjjjbbb]]]ddd]]]ffflllnnnvvvvvvyyy||||||yyy|||zzzyyyfffvvvrrrvvvnnn{{{{{{lll»ơZZZ]]]gggxxxlllllllllvvveee```uuuMMM111******+++222111+++)))333666:::...333AAA(((yyyMMMYYY|||MMMCCCZZZ]]]]]]bbb]]]]]]lll```gggnnnnnnrrrvvv|||nnn}}}}}}}}}|||yyynnnzzzzzzxxxó»Ƌlllllllllvvvkkkrrrnnnxxxlllbbbfffuuu\\\(((...(((333)))333666111---666******333666fffRRR|||\\\CCCMMMRRR]]]\\\]]]ddd```fffffflllnnnuuuuuurrr|||||||||}}}yyy|||jjjzzz{{{nnn{{{oooooo´lllnnnnnnnnnnnnuuuyyyyyyvvvuuufffbbbffflllMMM((((((***111111111111(((333111(((...111111zzzbbb|||YYYCCCZZZYYY]]]ffffff\\\bbbbbbgggnnnyyy|||nnnyyy}}}uuu|||yyyyyyuuuơllluuuvvvuuuuuuuuuuuuuuurrrnnnnnnvvvffflllnnn((((((***(((666666666111...333666***:::111www|||ZZZYYY\\\\\\```fffffffff\\\llllllvvvnnnnnnuuuyyy}}}nnn}}}|||zzzyyyyyyuuuoooÇlllzzzzzzuuu|||uuuuuunnnnnnggglllllllllnnn,,,***111333666666666:::666...AAA...(((111zzzyyy\\\MMM\\\fffbbbgggfffdddffffffnnnuuurrrwwwuuuyyyyyy||||||}}}||||||yyy|||yyy||||||nnnrrruuuuuu|||uuu}}}zzz}}}uuu|||uuu{{{uuulllnnn{{{הfff111...(((......666)))111333333...111111|||YYYZZZ``````fffffffffffffffllljjjnnnnnnnnnyyy|||yyy}}}yyy|||||||||uuuyyy}}}|||{{{nnnyyyuuu|||}}}|||zzz}}}zzzuuuuuunnnnnnllluuuۻffffff...(((...***333666666333...(((...111|||\\\\\\\\\fffgggfffffflllffffffnnnnnnnnnnnnyyyyyy|||uuuyyyzzz}}}|||}}}zzz|||||||||}}}zzzzzzvvv|||yyy|||||||||uuuzzzyyy}}}zzzzzzuuu{{{uuuuuunnnkkkuuulllfffooo```111******111666666666AAA...***111|||\\\ZZZ]]]]]]fffnnnfffffffffllllllfffnnnyyy|||uuu|||}}}}}}}}}|||||||||uuu||||||||||||}}}||||||}}}yyy|||nnnnnnyyyyyy}}}uuu}}}}}}zzzuuuzzz}}}|||}}}}}}uuuyyyvvvnnnmmmlllnnnzzzۻZZZlllggguuu]]]222***111:::666666222333)))666|||YYYMMM``````fffffffffffffff]]]bbbvvvnnn|||uuuuuuyyy|||}}}uuuyyy}}}}}}zzz|||yyyyyyuuu|||nnn|||yyyyyyuuu}}}}}}|||uuuzzz}}}uuuzzzuuuzzzzzzyyyuuuooolllnnnrrrmmmlll{{{xxxZZZ)))111---AAAAAA666111222)))|||\\\ZZZ```bbbfffgggffflllffffffgggllluuu|||}}}|||}}}yyy|||}}}yyyyyy|||zzz|||uuuuuuyyyuuu|||||||||yyyyyyyyy|||}}}yyy|||yyy|||}}}zzzuuuyyyuuuuuummmnnnۨffflllxxxuuu]]]+++(((111111111666111666|||dddZZZ```ffflllllllllfffgggffffffnnnnnnuuu|||||||||||||||}}}|||}}}yyywwwwww|||}}}}}}zzz|||}}}uuuuuu|||yyy}}}|||uuuyyyuuuuuunnnmmmmmmۻggg```xxx{{{yyynnnuuuYYY+++333---333333666111www]]]ZZZ]]]ffffffggglllbbbffflllfffvvvnnnuuuyyy}}}nnn}}}}}}}}}}}}zzz|||zzz}}}||||||||||||yyyyyyuuu}}}|||yyyzzzzzz|||uuu}}}uuuxxxlllxxx֔llllllnnnuuu{{{{{{uuuuuuCCC(((***......111111zzzbbbZZZdddffflllfffffflllffffffnnnlllvvvuuuuuu||||||}}}|||uuu}}}zzz||||||||||||www}}}}}}|||}}}||||||zzz||||||nnnuuuyyy}}}}}}{{{uuulllllluuuuuuۼfffmmmxxx{{{nnn{{{nnn{{{zzzMMM***111......:::fffZZZ]]]fffffffffbbbgggffffffnnnnnnuuuyyyyyy|||}}}||||||zzz||||||zzz}}}|||||||||}}}zzzzzz{{{uuufffllllllτjjjuuu{{{llluuu{{{nnnvvv{{{{{{MMM(((::::::...```]]]]]]gggnnnlll]]]llllll]]]fffnnnnnn|||nnn}}}}}}uuu}}}|||}}}||||||||||||}}}}}}nnnnnnuuullljjjlll۪jjjuuu{{{nnnuuu}}}xxxnnnuuuuuuMMM...***...yyyfffbbb]]]fffgggggguuufffffflllnnn{{{uuuuuuuuu||||||}}}zzzyyy|||zzz}}}}}}}}}zzzuuuuuuuuunnnlllbbbjjjggguuuuuunnnuuuuuuuuuuuuuuu}}}uuuIII...(((yyyXXXMMMbbb]]]lllffflllgggfffffflllnnnuuu|||zzz}}}}}}}}}|||}}}}}}}}}}}}}}}}}}|||uuuzzzlllxxxnnnggglll֮fffmmmlllnnnllluuuuuu{{{|||uuuyyy{{{uuuMMM222}}}fffZZZbbbjjjgggbbbllllllmmmggglllyyyyyyuuu}}}|||}}}}}}vvvzzzuuu}}}uuuuuummmlllfffuuulllfffllluuuuuuxxx{{{xxxnnnuuuzzzuuuuuuuuuuuunnn```ZZZfffllllll```lllllllllggglllnnnnnnuuu}}}|||}}}}}}uuuuuu}}}uuuuuuxxxlllfffuuuۻfffbbblllxxx{{{{{{{{{uuu{{{{{{uuuuuuuuuuuuuuunnn{{{Release_v0.3/kernels/my_test.cl000066400000000000000000000016061223142177000166550ustar00rootroot00000000000000__kernel void my_test(__global int2 *src, __global int *offsets, __global uint2 *dst, int w) { int i, index, j; uint2 out; unsigned int a, b, c, d; int2 rle; int gid = get_global_id(0); index = offsets[gid]; int i0 = 0; rle = src[index]; for (i = 0; i < w; i++, i0 += 8) { if (i0+0 >= rle.x) { index++; rle = src[index]; } a = rle.y; if (i0+1 >= rle.x) { index++; rle = src[index]; } b = rle.y; if (i0+2 >= rle.x) { index++; rle = src[index]; } c = rle.y; if (i0+3 >= rle.x) { index++; rle = src[index]; } d = rle.y; out.x = (d<<24)|(c<<16)|(b<<8)|(a); if (i0+4 >= rle.x) { index++; rle = src[index]; } a = rle.y; if (i0+5 >= rle.x) { index++; rle = src[index]; } b = rle.y; if (i0+6 >= rle.x) { index++; rle = src[index]; } c = rle.y; if (i0+7 >= rle.x) { index++; rle = src[index]; } d = rle.y; out.y = (d<<24)|(c<<16)|(b<<8)|(a); dst[gid*w + i] = out; } } Release_v0.3/kernels/null_kernel_arg.cl000066400000000000000000000003751223142177000203360ustar00rootroot00000000000000__kernel void null_kernel_arg(__global unsigned int *dst, __global unsigned int * mask_global, __constant unsigned int* mask_const) { if(dst && mask_global==0 && mask_const == NULL) { uint idx = (uint)get_global_id(0); dst[idx] = idx; } } Release_v0.3/kernels/test_cl_finish.cl000066400000000000000000000003201223142177000201560ustar00rootroot00000000000000 __kernel void test_cl_finish(__global int *src, __global int *dst, int n, int num_threads) { int tid, pos; tid = get_global_id(0); for (pos=tid; pos < n; pos+=num_threads) { dst[pos] = src[pos]; } } Release_v0.3/kernels/test_copy_buffer.cl000066400000000000000000000002041223142177000205240ustar00rootroot00000000000000__kernel void test_copy_buffer(__global float* src, __global float* dst) { int id = (int)get_global_id(0); dst[id] = src[id]; } Release_v0.3/kernels/test_copy_buffer_row.cl000066400000000000000000000003411223142177000214150ustar00rootroot00000000000000__kernel void test_copy_buffer_row(__global int *src, __global int *dst, __global int *data) { int row = data[0]; int size = data[1]; int id = (int) get_global_id(0); for (; id < size; id += row) dst[id] = src[id]; } Release_v0.3/kernels/test_copy_image.cl000066400000000000000000000004351223142177000203430ustar00rootroot00000000000000__kernel void test_copy_image(__read_only image2d_t src, __write_only image2d_t dst, sampler_t sampler) { int2 coord; int4 color; coord.x = (int)get_global_id(0); coord.y = (int)get_global_id(1); color = read_imagei(src, sampler, coord); write_imagei(dst, coord, color); } Release_v0.3/kernels/test_copy_image1.cl000066400000000000000000000022771223142177000204320ustar00rootroot00000000000000#define S(A,B,C) CLK_NORMALIZED_COORDS_##A | CLK_ADDRESS_##B | CLK_FILTER_##C #define COPY_IMAGE(_dst, _sampler, scoord, dcoord) \ color = read_imagei(src, _sampler, scoord);\ write_imagei(_dst, dcoord, color) __kernel void test_copy_image1(__read_only image2d_t src, __write_only image2d_t dst0, sampler_t sampler0, __write_only image2d_t dst1, __write_only image2d_t dst2, __write_only image2d_t dst3, __write_only image2d_t dst4, float w_inv, float h_inv) { const sampler_t sampler1 = S(FALSE, REPEAT, NEAREST); const sampler_t sampler2 = S(FALSE, CLAMP, NEAREST); const sampler_t sampler3 = S(FALSE, MIRRORED_REPEAT, NEAREST); const sampler_t sampler4 = S(TRUE, REPEAT, NEAREST); int2 coord; float2 fcoord; int4 color; coord.x = (int)get_global_id(0); coord.y = (int)get_global_id(1); fcoord.x = coord.x * w_inv; fcoord.y = coord.y * h_inv; COPY_IMAGE(dst0, sampler0, coord, coord); COPY_IMAGE(dst1, sampler1, coord, coord); COPY_IMAGE(dst2, sampler2, coord, coord); COPY_IMAGE(dst3, sampler3, coord, coord); COPY_IMAGE(dst4, sampler4, fcoord, coord); } Release_v0.3/kernels/test_copy_image_3d.cl000066400000000000000000000015411223142177000207300ustar00rootroot00000000000000__kernel void test_copy_image_3d(__read_only image3d_t src, __write_only image3d_t dst, sampler_t sampler, __write_only image2d_t buf0, __write_only image2d_t buf1, __write_only image2d_t buf2, __write_only image2d_t buf3) { int4 coord; int2 coord2; float4 color; coord.x = (int)get_global_id(0); coord.y = (int)get_global_id(1); coord.z = (int)get_global_id(2); coord2.x = coord.x; coord2.y = coord.y; color = read_imagef(src, sampler, coord); write_imagef(dst, coord, color); if (coord.z == 0) write_imagef(buf0, coord2, color); else if (coord.z == 1) write_imagef(buf1, coord2, color); else if (coord.z == 2) write_imagef(buf2, coord2, color); else if (coord.z == 3) write_imagef(buf3, coord2, color); } Release_v0.3/kernels/test_fill_gl_image.cl000066400000000000000000000005551223142177000210040ustar00rootroot00000000000000__kernel void test_fill_gl_image(image2d_t img, int color) { int2 coord; float4 color_v4; coord.x = get_global_id(0); coord.y = get_global_id(1); color_v4 = (float4){((color >> 24) & 0xFF), (color >> 16) & 0xFF, (color >> 8) & 0xFF, color & 0xFF}; color_v4 = color_v4 / 255.0f; write_imagef(img, coord, color_v4); } Release_v0.3/kernels/test_fill_image.cl000066400000000000000000000005341223142177000203170ustar00rootroot00000000000000__kernel void test_fill_image(__write_only image2d_t dst, uint color) { int2 coord; int4 color4; color4.s0 = (color >> 24) & 0xFF; color4.s1 = (color >> 16) & 0xFF; color4.s2 = (color >> 8) & 0xFF; color4.s3 = color & 0xFF; coord.x = (int)get_global_id(0); coord.y = (int)get_global_id(1); write_imagei(dst, coord, color4); } Release_v0.3/kernels/test_fill_image0.cl000066400000000000000000000004331223142177000203750ustar00rootroot00000000000000__kernel void test_fill_image0(__write_only image2d_t dst) { int2 coord; coord.x = (int)get_global_id(0); coord.y = (int)get_global_id(1); int4 color4 = {coord.y & 0xFF, (coord.y & 0xFF00) >> 8, coord.x & 0xFF, (coord.x & 0xFF00) >> 8}; write_imagei(dst, coord, color4); } Release_v0.3/kernels/test_fill_image_3d.cl000066400000000000000000000006021223142177000207010ustar00rootroot00000000000000__kernel void test_fill_image_3d(__write_only image3d_t dst, uint color) { int4 coord; int4 color4; color4.s0 = (color >> 24) & 0xFF; color4.s1 = (color >> 16) & 0xFF; color4.s2 = (color >> 8) & 0xFF; color4.s3 = color & 0xFF; coord.x = (int)get_global_id(0); coord.y = (int)get_global_id(1); coord.z = (int)get_global_id(2); write_imagei(dst, coord, color4); } Release_v0.3/kernels/test_fill_image_3d_2.cl000066400000000000000000000004101223142177000211170ustar00rootroot00000000000000__kernel void test_fill_image_3d_2(__write_only image3d_t dst) { int4 coord; int4 color4 = {0x12, 0x34, 0x56, 0x78}; coord.x = (int)get_global_id(0); coord.y = (int)get_global_id(1); coord.z = (int)get_global_id(2); write_imagei(dst, coord, color4); } Release_v0.3/kernels/test_get_image_info.cl000066400000000000000000000007101223142177000211570ustar00rootroot00000000000000__kernel void test_get_image_info(__write_only image3d_t src, __global int *size, __global int *fmt) { int id = (int)get_global_id(0); int w, h, depth; w = get_image_width(src); h = get_image_height(src); depth = get_image_depth(src); int channel_data_type = get_image_channel_data_type(src); int channel_order = get_image_channel_order(src); size[id] = (w << 20 | h << 8 | depth); fmt[id] = (channel_data_type << 16 | channel_order); } Release_v0.3/kernels/test_movforphi_undef.cl000066400000000000000000000006511223142177000214210ustar00rootroot00000000000000__kernel void test_movforphi_undef(__read_only image2d_t src, __write_only image2d_t dst, sampler_t sampler) { int2 coord, dstCoord; int4 color; int x = get_global_id(0); int y = get_global_id(1); dstCoord.x = x; dstCoord.y = y; coord.y = y; for(int j = -8; j < 2; j++) { coord.x = j + x; color = read_imagei(src, sampler, coord); if (j == 1 + x) write_imagei(dst, dstCoord, color); } } Release_v0.3/kernels/test_write_only.cl000066400000000000000000000001471223142177000204220ustar00rootroot00000000000000__kernel void test_write_only(__global int *dst) { int id = (int)get_global_id(0); dst[id] = id; } Release_v0.3/setup_fulsim_hsw.sh000066400000000000000000000001611223142177000171350ustar00rootroot00000000000000export INTEL_DEVID_OVERRIDE=0x0094 export DEVICE=hsw_m0 export OCL_FULSIM_RUN=1 export OCL_FULSIM_DEBUG_MODE=$1 Release_v0.3/setup_fulsim_ivb.sh000066400000000000000000000003521223142177000171160ustar00rootroot00000000000000export INTEL_DEVID_OVERRIDE=0x0166 # or, 0x0112 export DEVICE=ivb_m_gt2 # snb_gt2 for SNB GT2 desktop export OCL_SIMULATOR=1 # 0 -> HW, 1 -> fulsim, 2 -> perfsim export OCL_FULSIM_DEBUG_MODE=$1 Release_v0.3/setup_perfsim_ivb.sh000066400000000000000000000003121223142177000172600ustar00rootroot00000000000000export INTEL_DEVID_OVERRIDE=0x0166 # or, 0x0112 export DEVICE=ivb_m_gt2 # snb_gt2 for SNB GT2 desktop export OCL_SIMULATOR=2 # 0 -> HW, 1 -> fulsim, 2 -> perfsim Release_v0.3/src/000077500000000000000000000000001223142177000137725ustar00rootroot00000000000000Release_v0.3/src/.gitignore000066400000000000000000000000251223142177000157570ustar00rootroot00000000000000OCLConfig.h libcl.so Release_v0.3/src/CMakeLists.txt000066400000000000000000000052271223142177000165400ustar00rootroot00000000000000include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${DRM_INCLUDE_PATH} ${CMAKE_CURRENT_SOURCE_DIR}/../backend/src/backend/ ${CMAKE_CURRENT_SOURCE_DIR}/../include ${MESA_SOURCE_INCLUDES}) macro (MakeKernelBinStr KERNEL_PATH KERNEL_FILES) foreach (KF ${KERNEL_FILES}) set (input_file ${KERNEL_PATH}/${KF}.cl) set (output_file ${KERNEL_PATH}/${KF}_str.c) list (APPEND KERNEL_STR_FILES ${output_file}) add_custom_command( OUTPUT ${output_file} COMMAND rm -rf ${output_file} COMMAND ${CMAKE_CURRENT_BINARY_DIR}/../backend/src/gbe_bin_generater -s ${input_file} -o${output_file} DEPENDS ${input_file} ${CMAKE_CURRENT_BINARY_DIR}/../backend/src/gbe_bin_generater) endforeach (KF) endmacro (MakeKernelBinStr) set (KERNEL_STR_FILES) set (KERNEL_NAMES cl_internal_copy_buf_align1 cl_internal_copy_buf_align4 cl_internal_copy_buf_align16) MakeKernelBinStr ("${CMAKE_CURRENT_SOURCE_DIR}/kernels/" "${KERNEL_NAMES}") set(OPENCL_SRC ${KERNEL_STR_FILES} cl_api.c cl_alloc.c cl_kernel.c cl_program.c cl_sampler.c cl_event.c cl_enqueue.c cl_image.c cl_mem.c cl_platform_id.c cl_extensions.c cl_device_id.c cl_context.c cl_command_queue.c cl_command_queue.h cl_command_queue_gen7.c cl_driver.h cl_driver.cpp cl_driver_defs.c intel/intel_gpgpu.c intel/intel_batchbuffer.c intel/intel_driver.c x11/dricommon.c x11/va_dri2.c) if (EGL_FOUND AND MESA_SOURCE_FOUND) set (OPENCL_SRC ${OPENCL_SRC} cl_mem_gl.c cl_gl_api.c x11/mesa_egl_extension.c x11/mesa_egl_res_share.c intel/intel_dri_resource_sharing.c) SET(CMAKE_CXX_FLAGS "-DHAS_EGL ${CMAKE_CXX_FLAGS}") SET(CMAKE_C_FLAGS "-DHAS_EGL ${CMAKE_C_FLAGS}") SET(OPTIONAL_EGL_LIBRARY "${EGL_LIBRARY}") else(EGL_FOUND AND MESA_SOURCE_FOUND) SET(OPTIONAL_EGL_LIBRARY "") endif (EGL_FOUND AND MESA_SOURCE_FOUND) if (OCLIcd_FOUND) set (OPENCL_SRC ${OPENCL_SRC} cl_khr_icd.c) SET(CMAKE_CXX_FLAGS "-DHAS_OCLIcd ${CMAKE_CXX_FLAGS}") SET(CMAKE_C_FLAGS "-DHAS_OCLIcd ${CMAKE_C_FLAGS}") endif (OCLIcd_FOUND) SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,-Bsymbolic,--allow-shlib-undefined") link_directories (${LLVM_LIBRARY_DIR}) add_library(cl SHARED ${OPENCL_SRC}) target_link_libraries( cl gbe ${XLIB_LIBRARY} ${XEXT_LIBRARY} ${XFIXES_LIBRARY} ${DRM_INTEL_LIBRARY} ${DRM_LIBRARY} ${OPENGL_LIBRARIES} ${OPTIONAL_EGL_LIBRARY}) install (TARGETS cl LIBRARY DESTINATION lib) Release_v0.3/src/OCLConfig.h.in000066400000000000000000000004351223142177000163150ustar00rootroot00000000000000// the configured options and settings for LIBCL #define LIBCL_DRIVER_VERSION_MAJOR @LIBCL_DRIVER_VERSION_MAJOR@ #define LIBCL_DRIVER_VERSION_MINOR @LIBCL_DRIVER_VERSION_MINOR@ #define LIBCL_C_VERSION_MAJOR @LIBCL_C_VERSION_MAJOR@ #define LIBCL_C_VERSION_MINOR @LIBCL_C_VERSION_MINOR@ Release_v0.3/src/cl_alloc.c000066400000000000000000000032701223142177000157100ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #include "cl_alloc.h" #include "cl_utils.h" #include #include #include static volatile int32_t cl_alloc_n = 0; LOCAL void* cl_malloc(size_t sz) { void * p = NULL; atomic_inc(&cl_alloc_n); p = malloc(sz); assert(p); return p; } LOCAL void* cl_aligned_malloc(size_t sz, size_t align) { void * p = NULL; atomic_inc(&cl_alloc_n); p = memalign(align, sz); assert(p); return p; } LOCAL void* cl_calloc(size_t n, size_t elem_size) { void *p = NULL; atomic_inc(&cl_alloc_n); p = calloc(n, elem_size); assert(p); return p; } LOCAL void* cl_realloc(void *ptr, size_t sz) { if (ptr == NULL) atomic_inc(&cl_alloc_n); return realloc(ptr, sz); } LOCAL void cl_free(void *ptr) { if (ptr == NULL) return; atomic_dec(&cl_alloc_n); free(ptr); } LOCAL size_t cl_report_unfreed(void) { return cl_alloc_n; } LOCAL void cl_report_set_all_freed(void) { cl_alloc_n = 0; } Release_v0.3/src/cl_alloc.h000066400000000000000000000026531223142177000157210ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #ifndef __CL_ALLOC_H__ #define __CL_ALLOC_H__ #include "cl_internals.h" #include /* Return a valid pointer for the requested memory block size */ extern void *cl_malloc(size_t sz); /* Aligned malloc */ extern void* cl_aligned_malloc(size_t sz, size_t align); /* malloc + memzero */ extern void *cl_calloc(size_t n, size_t elem_size); /* Regular realloc */ extern void *cl_realloc(void *ptr, size_t sz); /* Free a pointer allocated with cl_*alloc */ extern void cl_free(void *ptr); /* We count the number of allocation. This function report the number of * allocation still unfreed */ extern size_t cl_report_unfreed(void); #endif /* __CL_ALLOC_H__ */ Release_v0.3/src/cl_api.c000066400000000000000000002326431223142177000153770ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #include "cl_platform_id.h" #include "cl_device_id.h" #include "cl_context.h" #include "cl_command_queue.h" #include "cl_enqueue.h" #include "cl_event.h" #include "cl_program.h" #include "cl_kernel.h" #include "cl_mem.h" #include "cl_image.h" #include "cl_sampler.h" #include "cl_alloc.h" #include "cl_utils.h" #include "CL/cl.h" #include "CL/cl_ext.h" #include "CL/cl_intel.h" #include #include #include #include #ifndef CL_VERSION_1_2 #define CL_MAP_WRITE_INVALIDATE_REGION (1 << 2) #define CL_DEVICE_TYPE_CUSTOM (1 << 4) #define CL_MEM_HOST_WRITE_ONLY (1 << 7) #define CL_MEM_HOST_READ_ONLY (1 << 8) #define CL_MEM_HOST_NO_ACCESS (1 << 9) typedef intptr_t cl_device_partition_property; #endif #define FILL_GETINFO_RET(TYPE, ELT, VAL, RET) \ do { \ if (param_value && param_value_size < sizeof(TYPE)*ELT) \ return CL_INVALID_VALUE; \ if (param_value) { \ memcpy(param_value, (VAL), sizeof(TYPE)*ELT); \ } \ \ if (param_value_size_ret) \ *param_value_size_ret = sizeof(TYPE)*ELT; \ return RET; \ } while(0) inline cl_int handle_events(cl_command_queue queue, cl_int num, const cl_event *wait_list, cl_event* event, enqueue_data* data, cl_command_type type) { cl_int status = cl_event_wait_events(num, wait_list, queue); cl_event e; if(event != NULL || status == CL_ENQUEUE_EXECUTE_DEFER) { e = cl_event_new(queue->ctx, queue, type, event!=NULL); if(event != NULL) *event = e; if(status == CL_ENQUEUE_EXECUTE_DEFER) { cl_event_new_enqueue_callback(e, data, num, wait_list); } } return status; } /* The following code checking overlap is from Appendix of openCL spec 1.1 */ inline cl_bool check_copy_overlap(const size_t src_offset[3], const size_t dst_offset[3], const size_t region[3], size_t row_pitch, size_t slice_pitch) { const size_t src_min[] = {src_offset[0], src_offset[1], src_offset[2]}; const size_t src_max[] = {src_offset[0] + region[0], src_offset[1] + region[1], src_offset[2] + region[2]}; const size_t dst_min[] = {dst_offset[0], dst_offset[1], dst_offset[2]}; const size_t dst_max[] = {dst_offset[0] + region[0], dst_offset[1] + region[1], dst_offset[2] + region[2]}; // Check for overlap cl_bool overlap = CL_TRUE; unsigned i; size_t dst_start = dst_offset[2] * slice_pitch + dst_offset[1] * row_pitch + dst_offset[0]; size_t dst_end = dst_start + (region[2] * slice_pitch + region[1] * row_pitch + region[0]); size_t src_start = src_offset[2] * slice_pitch + src_offset[1] * row_pitch + src_offset[0]; size_t src_end = src_start + (region[2] * slice_pitch + region[1] * row_pitch + region[0]); for (i=0; i != 3; ++i) { overlap = overlap && (src_min[i] < dst_max[i]) && (src_max[i] > dst_min[i]); } if (!overlap) { size_t delta_src_x = (src_offset[0] + region[0] > row_pitch) ? src_offset[0] + region[0] - row_pitch : 0; size_t delta_dst_x = (dst_offset[0] + region[0] > row_pitch) ? dst_offset[0] + region[0] - row_pitch : 0; if ( (delta_src_x > 0 && delta_src_x > dst_offset[0]) || (delta_dst_x > 0 && delta_dst_x > src_offset[0]) ) { if ( (src_start <= dst_start && dst_start < src_end) || (dst_start <= src_start && src_start < dst_end) ) overlap = CL_TRUE; } if (region[2] > 1) { size_t src_height = slice_pitch / row_pitch; size_t dst_height = slice_pitch / row_pitch; size_t delta_src_y = (src_offset[1] + region[1] > src_height) ? src_offset[1] + region[1] - src_height : 0; size_t delta_dst_y = (dst_offset[1] + region[1] > dst_height) ? dst_offset[1] + region[1] - dst_height : 0; if ( (delta_src_y > 0 && delta_src_y > dst_offset[1]) || (delta_dst_y > 0 && delta_dst_y > src_offset[1]) ) { if ( (src_start <= dst_start && dst_start < src_end) || (dst_start <= src_start && src_start < dst_end) ) overlap = CL_TRUE; } } } return overlap; } static cl_int cl_check_device_type(cl_device_type device_type) { const cl_device_type valid = CL_DEVICE_TYPE_GPU | CL_DEVICE_TYPE_CPU | CL_DEVICE_TYPE_ACCELERATOR | CL_DEVICE_TYPE_DEFAULT | CL_DEVICE_TYPE_CUSTOM; if( (device_type & valid) == 0) { return CL_INVALID_DEVICE_TYPE; } if(UNLIKELY(!(device_type & CL_DEVICE_TYPE_DEFAULT) && !(device_type & CL_DEVICE_TYPE_GPU))) return CL_DEVICE_NOT_FOUND; return CL_SUCCESS; } static cl_int cl_device_id_is_ok(const cl_device_id device) { return device != cl_get_gt_device() ? CL_FALSE : CL_TRUE; } cl_int clGetPlatformIDs(cl_uint num_entries, cl_platform_id * platforms, cl_uint * num_platforms) { if(UNLIKELY(platforms == NULL && num_platforms == NULL)) return CL_INVALID_VALUE; if(UNLIKELY(num_entries == 0 && platforms != NULL)) return CL_INVALID_VALUE; return cl_get_platform_ids(num_entries, platforms, num_platforms); } cl_int clGetPlatformInfo(cl_platform_id platform, cl_platform_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) { /* Only one platform. This is easy */ if (UNLIKELY(platform != NULL && platform != intel_platform)) return CL_INVALID_PLATFORM; return cl_get_platform_info(platform, param_name, param_value_size, param_value, param_value_size_ret); } cl_int clGetDeviceIDs(cl_platform_id platform, cl_device_type device_type, cl_uint num_entries, cl_device_id * devices, cl_uint * num_devices) { cl_int err = CL_SUCCESS; /* Check parameter consistency */ if (UNLIKELY(devices == NULL && num_devices == NULL)) return CL_INVALID_VALUE; if (UNLIKELY(platform && platform != intel_platform)) return CL_INVALID_PLATFORM; if (UNLIKELY(devices && num_entries == 0)) return CL_INVALID_VALUE; err = cl_check_device_type(device_type); if(err != CL_SUCCESS) return err; return cl_get_device_ids(platform, device_type, num_entries, devices, num_devices); } cl_int clGetDeviceInfo(cl_device_id device, cl_device_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) { return cl_get_device_info(device, param_name, param_value_size, param_value, param_value_size_ret); } cl_int clCreateSubDevices(cl_device_id in_device, const cl_device_partition_property * properties, cl_uint num_devices, cl_device_id * out_devices, cl_uint * num_devices_ret) { NOT_IMPLEMENTED; return 0; } cl_int clRetainDevice(cl_device_id device) { // XXX stub for C++ Bindings return CL_SUCCESS; } cl_int clReleaseDevice(cl_device_id device) { // XXX stub for C++ Bindings return CL_SUCCESS; } cl_context clCreateContext(const cl_context_properties * properties, cl_uint num_devices, const cl_device_id * devices, void (* pfn_notify) (const char*, const void*, size_t, void*), void * user_data, cl_int * errcode_ret) { cl_int err = CL_SUCCESS; cl_context context = NULL; /* Assert parameters correctness */ INVALID_VALUE_IF (devices == NULL); INVALID_VALUE_IF (num_devices == 0); INVALID_VALUE_IF (pfn_notify == NULL && user_data != NULL); /* Now check if the user is asking for the right device */ INVALID_DEVICE_IF (cl_device_id_is_ok(*devices) == CL_FALSE); context = cl_create_context(properties, num_devices, devices, pfn_notify, user_data, &err); error: if (errcode_ret) *errcode_ret = err; return context; } cl_context clCreateContextFromType(const cl_context_properties * properties, cl_device_type device_type, void (CL_CALLBACK *pfn_notify) (const char *, const void *, size_t, void *), void * user_data, cl_int * errcode_ret) { cl_context context = NULL; cl_int err = CL_SUCCESS; cl_device_id devices[1]; cl_uint num_devices = 1; INVALID_VALUE_IF (pfn_notify == NULL && user_data != NULL); err = cl_check_device_type(device_type); if(err != CL_SUCCESS) { goto error; } err = cl_get_device_ids(NULL, device_type, 1, &devices[0], &num_devices); if (err != CL_SUCCESS) { goto error; } context = cl_create_context(properties, num_devices, devices, pfn_notify, user_data, &err); error: if (errcode_ret) *errcode_ret = err; return context; } cl_int clRetainContext(cl_context context) { cl_int err = CL_SUCCESS; CHECK_CONTEXT (context); cl_context_add_ref(context); error: return err; } cl_int clReleaseContext(cl_context context) { cl_int err = CL_SUCCESS; CHECK_CONTEXT (context); cl_context_delete(context); error: return err; } cl_int clGetContextInfo(cl_context context, cl_context_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) { cl_int err = CL_SUCCESS; CHECK_CONTEXT (context); if (param_name == CL_CONTEXT_DEVICES) { FILL_GETINFO_RET (cl_device_id, 1, &context->device, CL_SUCCESS); } else if (param_name == CL_CONTEXT_NUM_DEVICES) { cl_uint n = 1; FILL_GETINFO_RET (cl_uint, 1, &n, CL_SUCCESS); } else if (param_name == CL_CONTEXT_REFERENCE_COUNT) { cl_uint ref = context->ref_n; FILL_GETINFO_RET (cl_uint, 1, &ref, CL_SUCCESS); } else if (param_name == CL_CONTEXT_PROPERTIES) { if(context->prop_len > 0) { FILL_GETINFO_RET (cl_context_properties, context->prop_len, context->prop_user, CL_SUCCESS); } else { cl_context_properties n = 0; FILL_GETINFO_RET (cl_context_properties, 1, &n, CL_SUCCESS); } } else { return CL_INVALID_VALUE; } error: return err; } cl_command_queue clCreateCommandQueue(cl_context context, cl_device_id device, cl_command_queue_properties properties, cl_int * errcode_ret) { cl_command_queue queue = NULL; cl_int err = CL_SUCCESS; CHECK_CONTEXT (context); INVALID_DEVICE_IF (device != context->device); INVALID_VALUE_IF (properties & ~(CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_PROFILING_ENABLE)); if(properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) {/*not supported now.*/ err = CL_INVALID_QUEUE_PROPERTIES; goto error; } queue = cl_context_create_queue(context, device, properties, &err); error: if (errcode_ret) *errcode_ret = err; return queue; } cl_int clRetainCommandQueue(cl_command_queue command_queue) { cl_int err = CL_SUCCESS; CHECK_QUEUE (command_queue); cl_command_queue_add_ref(command_queue); error: return err; } cl_int clReleaseCommandQueue(cl_command_queue command_queue) { cl_int err = CL_SUCCESS; CHECK_QUEUE (command_queue); cl_command_queue_delete(command_queue); error: return err; } cl_int clGetCommandQueueInfo(cl_command_queue command_queue, cl_command_queue_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) { cl_int err = CL_SUCCESS; CHECK_QUEUE (command_queue); if (param_name == CL_QUEUE_CONTEXT) { FILL_GETINFO_RET (cl_context, 1, &command_queue->ctx, CL_SUCCESS); } else if (param_name == CL_QUEUE_DEVICE) { FILL_GETINFO_RET (cl_device_id, 1, &command_queue->ctx->device, CL_SUCCESS); } else if (param_name == CL_QUEUE_REFERENCE_COUNT) { cl_uint ref = command_queue->ref_n; FILL_GETINFO_RET (cl_uint, 1, &ref, CL_SUCCESS); } else if (param_name == CL_QUEUE_PROPERTIES) { FILL_GETINFO_RET (cl_command_queue_properties, 1, &command_queue->props, CL_SUCCESS); } else { return CL_INVALID_VALUE; } error: return err; } cl_mem clCreateBuffer(cl_context context, cl_mem_flags flags, size_t size, void * host_ptr, cl_int * errcode_ret) { cl_mem mem = NULL; cl_int err = CL_SUCCESS; CHECK_CONTEXT (context); mem = cl_mem_new_buffer(context, flags, size, host_ptr, &err); error: if (errcode_ret) *errcode_ret = err; return mem; } cl_mem clCreateSubBuffer(cl_mem buffer, cl_mem_flags flags, cl_buffer_create_type buffer_create_type, const void * buffer_create_info, cl_int * errcode_ret) { #if 0 cl_int err = CL_SUCCESS; CHECK_MEM (buffer); NOT_IMPLEMENTED; error: #endif return NULL; } cl_mem clCreateImage(cl_context context, cl_mem_flags flags, const cl_image_format *image_format, const cl_image_desc *image_desc, void *host_ptr, cl_int * errcode_ret) { cl_mem mem = NULL; cl_int err = CL_SUCCESS; CHECK_CONTEXT (context); mem = cl_mem_new_image(context, flags, image_format, image_desc, host_ptr, &err); error: if (errcode_ret) *errcode_ret = err; return mem; } cl_mem clCreateImage2D(cl_context context, cl_mem_flags flags, const cl_image_format * image_format, size_t image_width, size_t image_height, size_t image_row_pitch, void * host_ptr, cl_int * errcode_ret) { cl_mem mem = NULL; cl_int err = CL_SUCCESS; CHECK_CONTEXT (context); cl_image_desc image_desc; memset(&image_desc, 0, sizeof(image_desc)); image_desc.image_type = CL_MEM_OBJECT_IMAGE2D; image_desc.image_width = image_width; image_desc.image_height = image_height; image_desc.image_row_pitch = image_row_pitch; mem = cl_mem_new_image(context, flags, image_format, &image_desc, host_ptr, &err); error: if (errcode_ret) *errcode_ret = err; return mem; } cl_mem clCreateImage3D(cl_context context, cl_mem_flags flags, const cl_image_format * image_format, size_t image_width, size_t image_height, size_t image_depth, size_t image_row_pitch, size_t image_slice_pitch, void * host_ptr, cl_int * errcode_ret) { cl_mem mem = NULL; cl_int err = CL_SUCCESS; CHECK_CONTEXT (context); cl_image_desc image_desc; image_desc.image_type = CL_MEM_OBJECT_IMAGE3D; image_desc.image_width = image_width; image_desc.image_height = image_height; image_desc.image_depth = image_depth; image_desc.image_row_pitch = image_row_pitch; image_desc.image_slice_pitch = image_slice_pitch; mem = cl_mem_new_image(context, flags, image_format, &image_desc, host_ptr, &err); error: if (errcode_ret) *errcode_ret = err; return mem; } cl_int clRetainMemObject(cl_mem memobj) { cl_int err = CL_SUCCESS; CHECK_MEM (memobj); cl_mem_add_ref(memobj); error: return err; } cl_int clReleaseMemObject(cl_mem memobj) { cl_int err = CL_SUCCESS; CHECK_MEM (memobj); cl_mem_delete(memobj); error: return err; } cl_int clGetSupportedImageFormats(cl_context ctx, cl_mem_flags flags, cl_mem_object_type image_type, cl_uint num_entries, cl_image_format * image_formats, cl_uint * num_image_formats) { cl_int err = CL_SUCCESS; CHECK_CONTEXT (ctx); if (UNLIKELY(num_entries == 0 && image_formats != NULL)) { err = CL_INVALID_VALUE; goto error; } if (UNLIKELY(image_type != CL_MEM_OBJECT_IMAGE2D && image_type != CL_MEM_OBJECT_IMAGE3D)) { err = CL_INVALID_VALUE; goto error; } err = cl_image_get_supported_fmt(ctx, image_type, num_entries, image_formats, num_image_formats); error: return err; } cl_int clGetMemObjectInfo(cl_mem memobj, cl_mem_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) { cl_int err = CL_SUCCESS; CHECK_MEM(memobj); err = cl_get_mem_object_info(memobj, param_name, param_value_size, param_value, param_value_size_ret); error: return err; } cl_int clGetImageInfo(cl_mem mem, cl_image_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) { return cl_get_image_info(mem, param_name, param_value_size, param_value, param_value_size_ret); } cl_int clSetMemObjectDestructorCallback(cl_mem memobj, void (CL_CALLBACK *pfn_notify) (cl_mem, void*), void * user_data) { cl_int err = CL_SUCCESS; CHECK_MEM(memobj); INVALID_VALUE_IF (pfn_notify == 0); cl_mem_dstr_cb *cb = (cl_mem_dstr_cb*)malloc(sizeof(cl_mem_dstr_cb)); if (!cb) { err = CL_OUT_OF_HOST_MEMORY; goto error; } memset(cb, 0, sizeof(cl_mem_dstr_cb)); cb->pfn_notify = pfn_notify; cb->user_data = user_data; cb->next = memobj->dstr_cb; memobj->dstr_cb = cb; error: return err; } cl_sampler clCreateSampler(cl_context context, cl_bool normalized, cl_addressing_mode addressing, cl_filter_mode filter, cl_int * errcode_ret) { cl_sampler sampler = NULL; cl_int err = CL_SUCCESS; CHECK_CONTEXT (context); sampler = cl_sampler_new(context, normalized, addressing, filter, &err); error: if (errcode_ret) *errcode_ret = err; return sampler; } cl_int clRetainSampler(cl_sampler sampler) { cl_int err = CL_SUCCESS; CHECK_SAMPLER (sampler); cl_sampler_add_ref(sampler); error: return err; } cl_int clReleaseSampler(cl_sampler sampler) { cl_int err = CL_SUCCESS; CHECK_SAMPLER (sampler); cl_sampler_delete(sampler); error: return err; } cl_int clGetSamplerInfo(cl_sampler sampler, cl_sampler_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) { NOT_IMPLEMENTED; return 0; } cl_program clCreateProgramWithSource(cl_context context, cl_uint count, const char ** strings, const size_t * lengths, cl_int * errcode_ret) { cl_program program = NULL; cl_int err = CL_SUCCESS; cl_uint i; CHECK_CONTEXT (context); INVALID_VALUE_IF (count == 0); INVALID_VALUE_IF (strings == NULL); for(i = 0; i < count; i++) { if(UNLIKELY(strings[i] == NULL)) { err = CL_INVALID_VALUE; goto error; } } program = cl_program_create_from_source(context, count, strings, lengths, &err); error: if (errcode_ret) *errcode_ret = err; return program; } cl_program clCreateProgramWithBinary(cl_context context, cl_uint num_devices, const cl_device_id * devices, const size_t * lengths, const unsigned char ** binaries, cl_int * binary_status, cl_int * errcode_ret) { cl_program program = NULL; cl_int err = CL_SUCCESS; CHECK_CONTEXT (context); program = cl_program_create_from_binary(context, num_devices, devices, lengths, binaries, binary_status, &err); error: if (errcode_ret) *errcode_ret = err; return program; } cl_int clRetainProgram(cl_program program) { cl_int err = CL_SUCCESS; CHECK_PROGRAM (program); cl_program_add_ref(program); error: return err; } cl_int clReleaseProgram(cl_program program) { cl_int err = CL_SUCCESS; CHECK_PROGRAM (program); cl_program_delete(program); error: return err; } cl_int clBuildProgram(cl_program program, cl_uint num_devices, const cl_device_id * device_list, const char * options, void (CL_CALLBACK *pfn_notify) (cl_program, void*), void * user_data) { cl_int err = CL_SUCCESS; CHECK_PROGRAM(program); INVALID_VALUE_IF (num_devices > 1); INVALID_VALUE_IF (num_devices == 0 && device_list != NULL); INVALID_VALUE_IF (num_devices != 0 && device_list == NULL); INVALID_VALUE_IF (pfn_notify == 0 && user_data != NULL); /* Everything is easy. We only support one device anyway */ if (num_devices != 0) { assert(program->ctx); INVALID_DEVICE_IF (device_list[0] != program->ctx->device); } /* TODO support create program from binary */ assert(program->source_type == FROM_LLVM || program->source_type == FROM_SOURCE || program->source_type == FROM_BINARY); if((err = cl_program_build(program, options)) != CL_SUCCESS) { goto error; } program->is_built = CL_TRUE; if (pfn_notify) pfn_notify(program, user_data); error: return err; } cl_int clUnloadCompiler(void) { return CL_SUCCESS; } cl_int clGetProgramInfo(cl_program program, cl_program_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) { cl_int err = CL_SUCCESS; char * ret_str = ""; CHECK_PROGRAM (program); if (param_name == CL_PROGRAM_REFERENCE_COUNT) { cl_uint ref = program->ref_n; FILL_GETINFO_RET (cl_uint, 1, (&ref), CL_SUCCESS); } else if (param_name == CL_PROGRAM_CONTEXT) { cl_context context = program->ctx; FILL_GETINFO_RET (cl_context, 1, &context, CL_SUCCESS); } else if (param_name == CL_PROGRAM_NUM_DEVICES) { cl_uint num_dev = 1; // Just 1 dev now. FILL_GETINFO_RET (cl_uint, 1, &num_dev, CL_SUCCESS); } else if (param_name == CL_PROGRAM_DEVICES) { cl_device_id dev_id = program->ctx->device; FILL_GETINFO_RET (cl_device_id, 1, &dev_id, CL_SUCCESS); } else if (param_name == CL_PROGRAM_SOURCE) { if (!program->source) FILL_GETINFO_RET (char, 1, &ret_str, CL_SUCCESS); FILL_GETINFO_RET (char, (strlen(program->source) + 1), program->source, CL_SUCCESS); } else if (param_name == CL_PROGRAM_BINARY_SIZES) { FILL_GETINFO_RET (size_t, 1, (&program->bin_sz), CL_SUCCESS); } else if (param_name == CL_PROGRAM_BINARIES) { if (!param_value) return CL_SUCCESS; /* param_value points to an array of n pointers allocated by the caller */ if (program->bin_sz > 0) { memcpy(*((void **)param_value), program->bin, program->bin_sz); } else { memcpy(*((void **)param_value), ret_str, 1); } return CL_SUCCESS; } else { return CL_INVALID_VALUE; } error: return err; } cl_int clGetProgramBuildInfo(cl_program program, cl_device_id device, cl_program_build_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) { cl_int err = CL_SUCCESS; char * ret_str = ""; CHECK_PROGRAM (program); INVALID_DEVICE_IF (device != program->ctx->device); if (param_name == CL_PROGRAM_BUILD_STATUS) { cl_build_status status; if (!program->is_built) status = CL_BUILD_NONE; else if (program->ker_n > 0) status = CL_BUILD_SUCCESS; else status = CL_BUILD_ERROR; // TODO: Support CL_BUILD_IN_PROGRESS ? FILL_GETINFO_RET (cl_build_status, 1, &status, CL_SUCCESS); } else if (param_name == CL_PROGRAM_BUILD_OPTIONS) { if (program->is_built && program->build_opts) ret_str = program->build_opts; FILL_GETINFO_RET (char, (strlen(ret_str)+1), ret_str, CL_SUCCESS); } else if (param_name == CL_PROGRAM_BUILD_LOG) { // TODO: need to add logs in backend when compiling. FILL_GETINFO_RET (char, (strlen(ret_str)+1), ret_str, CL_SUCCESS); } else { return CL_INVALID_VALUE; } error: return err; } cl_kernel clCreateKernel(cl_program program, const char * kernel_name, cl_int * errcode_ret) { cl_kernel kernel = NULL; cl_int err = CL_SUCCESS; CHECK_PROGRAM (program); if (program->is_built == CL_FALSE) { err = CL_INVALID_PROGRAM_EXECUTABLE; goto error; } INVALID_VALUE_IF (kernel_name == NULL); kernel = cl_program_create_kernel(program, kernel_name, &err); error: if (errcode_ret) *errcode_ret = err; return kernel; } cl_int clCreateKernelsInProgram(cl_program program, cl_uint num_kernels, cl_kernel * kernels, cl_uint * num_kernels_ret) { cl_int err = CL_SUCCESS; CHECK_PROGRAM (program); if (program->is_built == CL_FALSE) { err = CL_INVALID_PROGRAM_EXECUTABLE; goto error; } if (kernels && num_kernels < program->ker_n) { err = CL_INVALID_VALUE; goto error; } if(num_kernels_ret) *num_kernels_ret = program->ker_n; if(kernels) err = cl_program_create_kernels_in_program(program, kernels); error: return err; } cl_int clRetainKernel(cl_kernel kernel) { cl_int err = CL_SUCCESS; CHECK_KERNEL(kernel); cl_kernel_add_ref(kernel); error: return err; } cl_int clReleaseKernel(cl_kernel kernel) { cl_int err = CL_SUCCESS; CHECK_KERNEL(kernel); cl_kernel_delete(kernel); error: return err; } cl_int clSetKernelArg(cl_kernel kernel, cl_uint arg_index, size_t arg_size, const void * arg_value) { cl_int err = CL_SUCCESS; CHECK_KERNEL(kernel); err = cl_kernel_set_arg(kernel, arg_index, arg_size, arg_value); error: return err; } cl_int clGetKernelInfo(cl_kernel kernel, cl_kernel_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) { cl_int err; CHECK_KERNEL(kernel); if (param_name == CL_KERNEL_CONTEXT) { FILL_GETINFO_RET (cl_context, 1, &kernel->program->ctx, CL_SUCCESS); } else if (param_name == CL_KERNEL_PROGRAM) { FILL_GETINFO_RET (cl_program, 1, &kernel->program, CL_SUCCESS); } else if (param_name == CL_KERNEL_NUM_ARGS) { cl_uint n = kernel->arg_n; FILL_GETINFO_RET (cl_uint, 1, &n, CL_SUCCESS); } else if (param_name == CL_KERNEL_REFERENCE_COUNT) { cl_int ref = kernel->ref_n; FILL_GETINFO_RET (cl_int, 1, &ref, CL_SUCCESS); } else if (param_name == CL_KERNEL_FUNCTION_NAME) { const char * n = cl_kernel_get_name(kernel); FILL_GETINFO_RET (cl_char, strlen(n)+1, n, CL_SUCCESS); } else { return CL_INVALID_VALUE; } error: return err; } cl_int clGetKernelWorkGroupInfo(cl_kernel kernel, cl_device_id device, cl_kernel_work_group_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) { return cl_get_kernel_workgroup_info(device, param_name, param_value_size, param_value, param_value_size_ret); } cl_int clWaitForEvents(cl_uint num_events, const cl_event * event_list) { cl_int err = CL_SUCCESS; cl_context ctx = NULL; if(num_events > 0 && event_list) ctx = event_list[0]->ctx; TRY(cl_event_check_waitlist, num_events, event_list, NULL, ctx); while(cl_event_wait_events(num_events, event_list, NULL) == CL_ENQUEUE_EXECUTE_DEFER) { usleep(8000); //sleep 8ms to wait other thread } error: return err; } cl_int clGetEventInfo(cl_event event, cl_event_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) { cl_int err = CL_SUCCESS; CHECK_EVENT(event); if (param_name == CL_EVENT_COMMAND_QUEUE) { FILL_GETINFO_RET (cl_command_queue, 1, &event->queue, CL_SUCCESS); } else if (param_name == CL_EVENT_CONTEXT) { FILL_GETINFO_RET (cl_context, 1, &event->ctx, CL_SUCCESS); } else if (param_name == CL_EVENT_COMMAND_TYPE) { FILL_GETINFO_RET (cl_command_type, 1, &event->type, CL_SUCCESS); } else if (param_name == CL_EVENT_COMMAND_EXECUTION_STATUS) { cl_event_update_status(event); FILL_GETINFO_RET (cl_int, 1, &event->status, CL_SUCCESS); } else if (param_name == CL_EVENT_REFERENCE_COUNT) { cl_uint ref = event->ref_n; FILL_GETINFO_RET (cl_int, 1, &ref, CL_SUCCESS); } else { return CL_INVALID_VALUE; } error: return err; } cl_event clCreateUserEvent(cl_context context, cl_int * errcode_ret) { cl_int err = CL_SUCCESS; cl_event event = NULL; CHECK_CONTEXT(context); TRY_ALLOC(event, cl_event_new(context, NULL, CL_COMMAND_USER, CL_TRUE)); error: if(errcode_ret) *errcode_ret = err; return event; } cl_int clRetainEvent(cl_event event) { cl_int err = CL_SUCCESS; CHECK_EVENT(event); cl_event_add_ref(event); error: return err; } cl_int clReleaseEvent(cl_event event) { cl_int err = CL_SUCCESS; CHECK_EVENT(event); cl_event_delete(event); error: return err; } cl_int clSetUserEventStatus(cl_event event, cl_int execution_status) { cl_int err = CL_SUCCESS; CHECK_EVENT(event); if(execution_status > CL_COMPLETE) { err = CL_INVALID_VALUE; goto error; } if(event->status != CL_SUBMITTED) { err = CL_INVALID_OPERATION; goto error; } cl_event_set_status(event, execution_status); error: return err; } cl_int clSetEventCallback(cl_event event, cl_int command_exec_callback_type, void (CL_CALLBACK * pfn_notify) (cl_event, cl_int, void *), void * user_data) { cl_int err = CL_SUCCESS; CHECK_EVENT(event); if((pfn_notify == NULL) || (command_exec_callback_type > CL_SUBMITTED) || (command_exec_callback_type < CL_COMPLETE)) { err = CL_INVALID_VALUE; goto error; } err = cl_event_set_callback(event, command_exec_callback_type, pfn_notify, user_data); error: return err; } cl_int clGetEventProfilingInfo(cl_event event, cl_profiling_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) { cl_int err = CL_SUCCESS; cl_ulong ret_val; CHECK_EVENT(event); if (!(event->queue->props & CL_QUEUE_PROFILING_ENABLE) || event->type == CL_COMMAND_USER || event->status != CL_COMPLETE) { err = CL_PROFILING_INFO_NOT_AVAILABLE; goto error; } if ((param_name != CL_PROFILING_COMMAND_QUEUED && param_name != CL_PROFILING_COMMAND_SUBMIT && param_name != CL_PROFILING_COMMAND_START && param_name != CL_PROFILING_COMMAND_END) || (param_value && param_value_size < sizeof(cl_ulong))) { err = CL_INVALID_VALUE; goto error; } err = cl_event_profiling(event, param_name, &ret_val); if (err == CL_SUCCESS) { if (param_value) *(cl_ulong*)param_value = ret_val; if (param_value_size_ret) *param_value_size_ret = sizeof(cl_ulong); } error: return err; } cl_int clFlush(cl_command_queue command_queue) { /* have nothing to do now, as currently * clEnqueueNDRangeKernel will flush at * the end of each calling. we may need * to optimize it latter.*/ return 0; } cl_int clFinish(cl_command_queue command_queue) { cl_int err = CL_SUCCESS; CHECK_QUEUE (command_queue); err = cl_command_queue_finish(command_queue); error: return err; } cl_int clEnqueueReadBuffer(cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_read, size_t offset, size_t size, void * ptr, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) { cl_int err = CL_SUCCESS; enqueue_data *data, defer_enqueue_data = { 0 }; CHECK_QUEUE(command_queue); CHECK_MEM(buffer); if (command_queue->ctx != buffer->ctx) { err = CL_INVALID_CONTEXT; goto error; } if (!ptr || !size || offset + size > buffer->size) { err = CL_INVALID_VALUE; goto error; } if (buffer->flags & (CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS)) { err = CL_INVALID_OPERATION; goto error; } TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, buffer->ctx); data = &defer_enqueue_data; data->type = EnqueueReadBuffer; data->mem_obj = buffer; data->ptr = ptr; data->offset = offset; data->size = size; if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, event, data, CL_COMMAND_READ_BUFFER) == CL_ENQUEUE_EXECUTE_IMM) { err = cl_enqueue_handle(data); if(event) cl_event_set_status(*event, CL_COMPLETE); } error: return err; } cl_int clEnqueueReadBufferRect(cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_read, const size_t * buffer_origin, const size_t * host_origin, const size_t * region, size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, void * ptr, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) { cl_int err = CL_SUCCESS; enqueue_data *data, no_wait_data = { 0 }; CHECK_QUEUE(command_queue); CHECK_MEM(buffer); if (command_queue->ctx != buffer->ctx) { err = CL_INVALID_CONTEXT; goto error; } if (!ptr || !region || region[0] == 0 || region[1] == 0 || region[2] == 0) { err = CL_INVALID_VALUE; goto error; } if(buffer_row_pitch == 0) buffer_row_pitch = region[0]; if(buffer_slice_pitch == 0) buffer_slice_pitch = region[1] * buffer_row_pitch; if(host_row_pitch == 0) host_row_pitch = region[0]; if(host_slice_pitch == 0) host_slice_pitch = region[1] * host_row_pitch; if (buffer_row_pitch < region[0] || host_row_pitch < region[0]) { err = CL_INVALID_VALUE; goto error; } if ((buffer_slice_pitch < region[1] * buffer_row_pitch || buffer_slice_pitch % buffer_row_pitch != 0 ) || (host_slice_pitch < region[1] * host_row_pitch || host_slice_pitch % host_row_pitch != 0 )) { err = CL_INVALID_VALUE; goto error; } if ((buffer_origin[2]+region[2])*buffer_slice_pitch + (buffer_origin[1]+region[1])*buffer_row_pitch + buffer_origin[0] + region[0] > buffer->size) { err = CL_INVALID_VALUE; goto error; } TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, buffer->ctx); data = &no_wait_data; data->type = EnqueueReadBufferRect; data->mem_obj = buffer; data->ptr = ptr; data->origin[0] = buffer_origin[0]; data->origin[1] = buffer_origin[1]; data->origin[2] = buffer_origin[2]; data->host_origin[0] = host_origin[0]; data->host_origin[1] = host_origin[1]; data->host_origin[2] = host_origin[2]; data->region[0] = region[0]; data->region[1] = region[1]; data->region[2] = region[2]; data->row_pitch = buffer_row_pitch; data->slice_pitch = buffer_slice_pitch; data->host_row_pitch = host_row_pitch; data->host_slice_pitch = host_slice_pitch; if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, event, data, CL_COMMAND_READ_BUFFER_RECT) == CL_ENQUEUE_EXECUTE_IMM) { err = cl_enqueue_handle(data); if(event) cl_event_set_status(*event, CL_COMPLETE); } error: return err; } cl_int clEnqueueWriteBuffer(cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_write, size_t offset, size_t size, const void * ptr, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) { cl_int err = CL_SUCCESS; enqueue_data *data, no_wait_data = { 0 }; CHECK_QUEUE(command_queue); CHECK_MEM(buffer); if (command_queue->ctx != buffer->ctx) { err = CL_INVALID_CONTEXT; goto error; } if (!ptr || !size || offset + size > buffer->size) { err = CL_INVALID_VALUE; goto error; } if (buffer->flags & (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS)) { err = CL_INVALID_OPERATION; goto error; } TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, buffer->ctx); data = &no_wait_data; data->type = EnqueueWriteBuffer; data->mem_obj = buffer; data->const_ptr = ptr; data->offset = offset; data->size = size; if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, event, data, CL_COMMAND_WRITE_BUFFER) == CL_ENQUEUE_EXECUTE_IMM) { err = cl_enqueue_handle(data); if(event) cl_event_set_status(*event, CL_COMPLETE); } error: return err; } cl_int clEnqueueWriteBufferRect(cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_write, const size_t * buffer_origin, const size_t * host_origin, const size_t * region, size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, const void * ptr, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) { cl_int err = CL_SUCCESS; enqueue_data *data, no_wait_data = { 0 }; CHECK_QUEUE(command_queue); CHECK_MEM(buffer); if (command_queue->ctx != buffer->ctx) { err = CL_INVALID_CONTEXT; goto error; } if (!ptr || !region || region[0] == 0 || region[1] == 0 || region[2] == 0) { err = CL_INVALID_VALUE; goto error; } if(buffer_row_pitch == 0) buffer_row_pitch = region[0]; if(buffer_slice_pitch == 0) buffer_slice_pitch = region[1] * buffer_row_pitch; if(host_row_pitch == 0) host_row_pitch = region[0]; if(host_slice_pitch == 0) host_slice_pitch = region[1] * host_row_pitch; if (buffer_row_pitch < region[0] || host_row_pitch < region[0]) { err = CL_INVALID_VALUE; goto error; } if ((buffer_slice_pitch < region[1] * buffer_row_pitch || buffer_slice_pitch % buffer_row_pitch != 0 ) || (host_slice_pitch < region[1] * host_row_pitch || host_slice_pitch % host_row_pitch != 0 )) { err = CL_INVALID_VALUE; goto error; } if ((buffer_origin[2]+region[2])*buffer_slice_pitch + (buffer_origin[1]+region[1])*buffer_row_pitch + buffer_origin[0] + region[0] > buffer->size) { err = CL_INVALID_VALUE; goto error; } TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, buffer->ctx); data = &no_wait_data; data->type = EnqueueWriteBufferRect; data->mem_obj = buffer; data->const_ptr = ptr; data->origin[0] = buffer_origin[0]; data->origin[1] = buffer_origin[1]; data->origin[2] = buffer_origin[2]; data->host_origin[0] = host_origin[0]; data->host_origin[1] = host_origin[1]; data->host_origin[2] = host_origin[2]; data->region[0] = region[0]; data->region[1] = region[1]; data->region[2] = region[2]; data->row_pitch = buffer_row_pitch; data->slice_pitch = buffer_slice_pitch; data->host_row_pitch = host_row_pitch; data->host_slice_pitch = host_slice_pitch; if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, event, data, CL_COMMAND_WRITE_BUFFER_RECT) == CL_ENQUEUE_EXECUTE_IMM) { err = cl_enqueue_handle(data); if(event) cl_event_set_status(*event, CL_COMPLETE); } error: return err; } cl_int clEnqueueCopyBuffer(cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_buffer, size_t src_offset, size_t dst_offset, size_t cb, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) { cl_int err = CL_SUCCESS; enqueue_data *data, no_wait_data = { 0 }; CHECK_QUEUE(command_queue); CHECK_MEM(src_buffer); CHECK_MEM(dst_buffer); if (command_queue->ctx != src_buffer->ctx) { err = CL_INVALID_CONTEXT; goto error; } if (command_queue->ctx != dst_buffer->ctx) { err = CL_INVALID_CONTEXT; goto error; } if (src_offset < 0 || src_offset + cb > src_buffer->size) { err = CL_INVALID_VALUE; goto error; } if (dst_offset < 0 || dst_offset + cb > src_buffer->size) { err = CL_INVALID_VALUE; goto error; } /* Check overlap */ if (src_buffer == dst_buffer && (src_offset <= dst_offset && dst_offset <= src_offset + cb - 1) && (dst_offset <= src_offset && src_offset <= dst_offset + cb - 1)) { err = CL_MEM_COPY_OVERLAP; goto error; } // TODO: Need to check the sub buffer cases. err = cl_mem_copy(command_queue, src_buffer, dst_buffer, src_offset, dst_offset, cb); TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, src_buffer->ctx); data = &no_wait_data; data->type = EnqueueCopyBuffer; data->queue = command_queue; if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, event, data, CL_COMMAND_COPY_BUFFER) == CL_ENQUEUE_EXECUTE_IMM) { err = cl_command_queue_flush(command_queue); } return 0; error: return err; } cl_int clEnqueueCopyBufferRect(cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_buffer, const size_t * src_origin, const size_t * dst_origin, const size_t * region, size_t src_row_pitch, size_t src_slice_pitch, size_t dst_row_pitch, size_t dst_slice_pitch, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) { cl_int err = CL_SUCCESS; enqueue_data *data, no_wait_data = { 0 }; CHECK_QUEUE(command_queue); CHECK_MEM(src_buffer); CHECK_MEM(dst_buffer); if ((command_queue->ctx != src_buffer->ctx) || (command_queue->ctx != dst_buffer->ctx)) { err = CL_INVALID_CONTEXT; goto error; } if (!region || region[0] == 0 || region[1] == 0 || region[2] == 0) { err = CL_INVALID_VALUE; goto error; } if(src_row_pitch == 0) src_row_pitch = region[0]; if(src_slice_pitch == 0) src_slice_pitch = region[1] * src_row_pitch; if(dst_row_pitch == 0) dst_row_pitch = region[0]; if(dst_slice_pitch == 0) dst_slice_pitch = region[1] * dst_row_pitch; if (src_row_pitch < region[0] || dst_row_pitch < region[0]) { err = CL_INVALID_VALUE; goto error; } if ((src_slice_pitch < region[1] * src_row_pitch || src_slice_pitch % src_row_pitch != 0 ) || (dst_slice_pitch < region[1] * dst_row_pitch || dst_slice_pitch % dst_row_pitch != 0 )) { err = CL_INVALID_VALUE; goto error; } if ((src_origin[2]+region[2])*src_slice_pitch + (src_origin[1]+region[1])*src_row_pitch + src_origin[0] + region[0] > src_buffer->size || (dst_origin[2]+region[2])*dst_slice_pitch + (dst_origin[1]+region[1])*dst_row_pitch + dst_origin[0] + region[0] > dst_buffer->size) { err = CL_INVALID_VALUE; goto error; } if (src_buffer == dst_buffer && (src_row_pitch != dst_row_pitch || src_slice_pitch != dst_slice_pitch)) { err = CL_INVALID_VALUE; goto error; } if (src_buffer == dst_buffer && check_copy_overlap(src_origin, dst_origin, region, src_row_pitch, src_slice_pitch)) { err = CL_MEM_COPY_OVERLAP; goto error; } cl_mem_copy_buffer_rect(command_queue, src_buffer, dst_buffer, src_origin, dst_origin, region, src_row_pitch, src_slice_pitch, dst_row_pitch, dst_slice_pitch); TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, src_buffer->ctx); data = &no_wait_data; data->type = EnqueueCopyBufferRect; data->queue = command_queue; if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, event, data, CL_COMMAND_COPY_BUFFER_RECT) == CL_ENQUEUE_EXECUTE_IMM) { err = cl_command_queue_flush(command_queue); } error: return err; } cl_int clEnqueueReadImage(cl_command_queue command_queue, cl_mem mem, cl_bool blocking_read, const size_t * origin, const size_t * region, size_t row_pitch, size_t slice_pitch, void * ptr, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) { cl_int err = CL_SUCCESS; enqueue_data *data, no_wait_data = { 0 }; CHECK_QUEUE(command_queue); CHECK_IMAGE(mem, image); if (command_queue->ctx != mem->ctx) { err = CL_INVALID_CONTEXT; goto error; } if (!origin || !region || origin[0] + region[0] > image->w || origin[1] + region[1] > image->h || origin[2] + region[2] > image->depth) { err = CL_INVALID_VALUE; goto error; } if (!row_pitch) row_pitch = image->bpp*region[0]; else if (row_pitch < image->bpp*region[0]) { err = CL_INVALID_VALUE; goto error; } if (image->slice_pitch) { if (!slice_pitch) slice_pitch = row_pitch*region[1]; else if (slice_pitch < row_pitch*region[1]) { err = CL_INVALID_VALUE; goto error; } } else if (slice_pitch) { err = CL_INVALID_VALUE; goto error; } if (!ptr) { err = CL_INVALID_VALUE; goto error; } if (mem->flags & (CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS)) { err = CL_INVALID_OPERATION; goto error; } TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, mem->ctx); data = &no_wait_data; data->type = EnqueueReadImage; data->mem_obj = mem; data->ptr = ptr; data->origin[0] = origin[0]; data->origin[1] = origin[1]; data->origin[2] = origin[2]; data->region[0] = region[0]; data->region[1] = region[1]; data->region[2] = region[2]; data->row_pitch = row_pitch; data->slice_pitch = slice_pitch; if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, event, data, CL_COMMAND_READ_IMAGE) == CL_ENQUEUE_EXECUTE_IMM) { err = cl_enqueue_handle(data); if(event) cl_event_set_status(*event, CL_COMPLETE); } error: return err; } cl_int clEnqueueWriteImage(cl_command_queue command_queue, cl_mem mem, cl_bool blocking_write, const size_t * origin, const size_t * region, size_t row_pitch, size_t slice_pitch, const void * ptr, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) { cl_int err = CL_SUCCESS; enqueue_data *data, no_wait_data = { 0 }; CHECK_QUEUE(command_queue); CHECK_IMAGE(mem, image); if (command_queue->ctx != mem->ctx) { err = CL_INVALID_CONTEXT; goto error; } if (!origin || !region || origin[0] + region[0] > image->w || origin[1] + region[1] > image->h || origin[2] + region[2] > image->depth) { err = CL_INVALID_VALUE; goto error; } if (!row_pitch) row_pitch = image->bpp*region[0]; else if (row_pitch < image->bpp*region[0]) { err = CL_INVALID_VALUE; goto error; } if (image->slice_pitch) { if (!slice_pitch) slice_pitch = row_pitch*region[1]; else if (slice_pitch < row_pitch*region[1]) { err = CL_INVALID_VALUE; goto error; } } else if (slice_pitch) { err = CL_INVALID_VALUE; goto error; } if (!ptr) { err = CL_INVALID_VALUE; goto error; } if (mem->flags & (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS)) { err = CL_INVALID_OPERATION; goto error; } TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, mem->ctx); data = &no_wait_data; data->type = EnqueueWriteImage; data->mem_obj = mem; data->const_ptr = ptr; data->origin[0] = origin[0]; data->origin[1] = origin[1]; data->origin[2] = origin[2]; data->region[0] = region[0]; data->region[1] = region[1]; data->region[2] = region[2]; data->row_pitch = row_pitch; data->slice_pitch = slice_pitch; if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, event, data, CL_COMMAND_WRITE_IMAGE) == CL_ENQUEUE_EXECUTE_IMM) { err = cl_enqueue_handle(data); if(event) cl_event_set_status(*event, CL_COMPLETE); } error: return err; } cl_int clEnqueueCopyImage(cl_command_queue command_queue, cl_mem src_mem, cl_mem dst_mem, const size_t * src_origin, const size_t * dst_origin, const size_t * region, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) { cl_int err = CL_SUCCESS; enqueue_data *data, no_wait_data = { 0 }; cl_bool overlap = CL_TRUE; cl_int i = 0; CHECK_QUEUE(command_queue); CHECK_IMAGE(src_mem, src_image); CHECK_IMAGE(dst_mem, dst_image); if (command_queue->ctx != src_mem->ctx || command_queue->ctx != dst_mem->ctx) { err = CL_INVALID_CONTEXT; goto error; } if (src_image->fmt.image_channel_order != dst_image->fmt.image_channel_order || src_image->fmt.image_channel_data_type != dst_image->fmt.image_channel_data_type) { err = CL_IMAGE_FORMAT_MISMATCH; goto error; } if (!src_origin || !region || src_origin[0] + region[0] > src_image->w || src_origin[1] + region[1] > src_image->h || src_origin[2] + region[2] > src_image->depth) { err = CL_INVALID_VALUE; goto error; } if (!dst_origin || !region || dst_origin[0] + region[0] > dst_image->w || dst_origin[1] + region[1] > dst_image->h || dst_origin[2] + region[2] > dst_image->depth) { err = CL_INVALID_VALUE; goto error; } if ((src_image->image_type == CL_MEM_OBJECT_IMAGE2D && (src_origin[2] != 0 || region[2] != 1)) || (dst_image->image_type == CL_MEM_OBJECT_IMAGE2D && (dst_origin[2] != 0 || region[2] != 1))) { err = CL_INVALID_VALUE; goto error; } if (src_image == dst_image) { for(i = 0; i < 3; i++) overlap = overlap && (src_origin[i] < dst_origin[i] + region[i]) && (dst_origin[i] < src_origin[i] + region[i]); if(overlap == CL_TRUE) { err = CL_MEM_COPY_OVERLAP; goto error; } } cl_mem_kernel_copy_image(command_queue, src_image, dst_image, src_origin, dst_origin, region); TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, src_mem->ctx); data = &no_wait_data; data->type = EnqueueCopyImage; data->queue = command_queue; if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, event, data, CL_COMMAND_COPY_IMAGE) == CL_ENQUEUE_EXECUTE_IMM) { err = cl_command_queue_flush(command_queue); } error: return err; } cl_int clEnqueueCopyImageToBuffer(cl_command_queue command_queue, cl_mem src_mem, cl_mem dst_buffer, const size_t * src_origin, const size_t * region, size_t dst_offset, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) { cl_int err = CL_SUCCESS; enqueue_data *data, no_wait_data = { 0 }; CHECK_QUEUE(command_queue); CHECK_IMAGE(src_mem, src_image); CHECK_MEM(dst_buffer); if (command_queue->ctx != src_mem->ctx || command_queue->ctx != dst_buffer->ctx) { err = CL_INVALID_CONTEXT; goto error; } if (dst_offset + region[0]*region[1]*region[2]*src_image->bpp > dst_buffer->size) { err = CL_INVALID_VALUE; goto error; } if (!src_origin || !region || src_origin[0] + region[0] > src_image->w || src_origin[1] + region[1] > src_image->h || src_origin[2] + region[2] > src_image->depth) { err = CL_INVALID_VALUE; goto error; } if (src_image->image_type == CL_MEM_OBJECT_IMAGE2D && (src_origin[2] != 0 || region[2] != 1)) { err = CL_INVALID_VALUE; goto error; } cl_mem_copy_image_to_buffer(command_queue, src_image, dst_buffer, src_origin, dst_offset, region); TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, src_mem->ctx); data = &no_wait_data; data->type = EnqueueCopyImageToBuffer; data->queue = command_queue; if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, event, data, CL_COMMAND_COPY_IMAGE_TO_BUFFER) == CL_ENQUEUE_EXECUTE_IMM) { err = cl_command_queue_flush(command_queue); } error: return err; } cl_int clEnqueueCopyBufferToImage(cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_mem, size_t src_offset, const size_t * dst_origin, const size_t * region, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) { cl_int err = CL_SUCCESS; enqueue_data *data, no_wait_data = { 0 }; CHECK_QUEUE(command_queue); CHECK_MEM(src_buffer); CHECK_IMAGE(dst_mem, dst_image); if (command_queue->ctx != src_buffer->ctx || command_queue->ctx != dst_mem->ctx) { err = CL_INVALID_CONTEXT; goto error; } if (src_offset + region[0]*region[1]*region[2]*dst_image->bpp > src_buffer->size) { err = CL_INVALID_VALUE; goto error; } if (!dst_origin || !region || dst_origin[0] + region[0] > dst_image->w || dst_origin[1] + region[1] > dst_image->h || dst_origin[2] + region[2] > dst_image->depth) { err = CL_INVALID_VALUE; goto error; } if (dst_image->image_type == CL_MEM_OBJECT_IMAGE2D && (dst_origin[2] != 0 || region[2] != 1)) { err = CL_INVALID_VALUE; goto error; } cl_mem_copy_buffer_to_image(command_queue, src_buffer, dst_image, src_offset, dst_origin, region); TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, dst_mem->ctx); data = &no_wait_data; data->type = EnqueueCopyBufferToImage; data->queue = command_queue; if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, event, data, CL_COMMAND_COPY_BUFFER_TO_IMAGE) == CL_ENQUEUE_EXECUTE_IMM) { err = cl_command_queue_flush(command_queue); } error: return err; } static cl_int _cl_map_mem(cl_mem mem, void **ptr, void **mem_ptr, size_t offset, size_t size) { cl_int slot = -1; int err = CL_SUCCESS; if (!(*ptr = cl_mem_map_gtt_unsync(mem))) { err = CL_MAP_FAILURE; goto error; } *ptr = (char*)(*ptr) + offset; if(mem->flags & CL_MEM_USE_HOST_PTR) { assert(mem->host_ptr); //only calc ptr here, will do memcpy in enqueue *mem_ptr = mem->host_ptr + offset; } else { *mem_ptr = *ptr; } /* Record the mapped address. */ if (!mem->mapped_ptr_sz) { mem->mapped_ptr_sz = 16; mem->mapped_ptr = (cl_mapped_ptr *)malloc( sizeof(cl_mapped_ptr) * mem->mapped_ptr_sz); if (!mem->mapped_ptr) { cl_mem_unmap_gtt(mem); err = CL_OUT_OF_HOST_MEMORY; goto error; } memset(mem->mapped_ptr, 0, mem->mapped_ptr_sz * sizeof(cl_mapped_ptr)); slot = 0; } else { int i = 0; for (; i < mem->mapped_ptr_sz; i++) { if (mem->mapped_ptr[i].ptr == NULL) { slot = i; break; } } if (i == mem->mapped_ptr_sz) { cl_mapped_ptr *new_ptr = (cl_mapped_ptr *)malloc( sizeof(cl_mapped_ptr) * mem->mapped_ptr_sz * 2); if (!new_ptr) { cl_mem_unmap_gtt (mem); err = CL_OUT_OF_HOST_MEMORY; goto error; } memset(new_ptr, 0, 2 * mem->mapped_ptr_sz * sizeof(cl_mapped_ptr)); memcpy(new_ptr, mem->mapped_ptr, mem->mapped_ptr_sz * sizeof(cl_mapped_ptr)); slot = mem->mapped_ptr_sz; mem->mapped_ptr_sz *= 2; free(mem->mapped_ptr); mem->mapped_ptr = new_ptr; } } assert(slot != -1); mem->mapped_ptr[slot].ptr = *mem_ptr; mem->mapped_ptr[slot].v_ptr = *ptr; mem->mapped_ptr[slot].size = size; mem->map_ref++; error: if (err != CL_SUCCESS) *mem_ptr = NULL; return err; } void * clEnqueueMapBuffer(cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_map, cl_map_flags map_flags, size_t offset, size_t size, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event, cl_int * errcode_ret) { cl_int err = CL_SUCCESS; void *ptr = NULL; void *mem_ptr = NULL; enqueue_data *data, no_wait_data = { 0 }; CHECK_QUEUE(command_queue); CHECK_MEM(buffer); if (command_queue->ctx != buffer->ctx) { err = CL_INVALID_CONTEXT; goto error; } if (!size || offset + size > buffer->size) { err = CL_INVALID_VALUE; goto error; } if ((map_flags & CL_MAP_READ && buffer->flags & (CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS)) || (map_flags & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION) && buffer->flags & (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS))) { err = CL_INVALID_OPERATION; goto error; } err = _cl_map_mem(buffer, &ptr, &mem_ptr, offset, size); if (err != CL_SUCCESS) goto error; TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, buffer->ctx); data = &no_wait_data; data->type = EnqueueMapBuffer; data->mem_obj = buffer; data->offset = offset; data->size = size; data->ptr = ptr; if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, event, data, CL_COMMAND_MAP_BUFFER) == CL_ENQUEUE_EXECUTE_IMM) { err = cl_enqueue_handle(data); if(event) cl_event_set_status(*event, CL_COMPLETE); } error: if (errcode_ret) *errcode_ret = err; return mem_ptr; } void * clEnqueueMapImage(cl_command_queue command_queue, cl_mem mem, cl_bool blocking_map, cl_map_flags map_flags, const size_t * origin, const size_t * region, size_t * image_row_pitch, size_t * image_slice_pitch, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event, cl_int * errcode_ret) { cl_int err = CL_SUCCESS; void *ptr = NULL; void *mem_ptr = NULL; enqueue_data *data, no_wait_data = { 0 }; CHECK_QUEUE(command_queue); CHECK_IMAGE(mem, image); if (command_queue->ctx != mem->ctx) { err = CL_INVALID_CONTEXT; goto error; } if (!origin || !region || origin[0] + region[0] > image->w || origin[1] + region[1] > image->h || origin[2] + region[2] > image->depth) { err = CL_INVALID_VALUE; goto error; } if (!image_row_pitch || (image->slice_pitch && !image_slice_pitch)) { err = CL_INVALID_VALUE; goto error; } *image_row_pitch = image->row_pitch; if (image_slice_pitch) *image_slice_pitch = image->slice_pitch; if ((map_flags & CL_MAP_READ && mem->flags & (CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS)) || (map_flags & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION) && mem->flags & (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS))) { err = CL_INVALID_OPERATION; goto error; } if (!(ptr = cl_mem_map_gtt_unsync(mem))) { err = CL_MAP_FAILURE; goto error; } size_t offset = image->bpp*origin[0] + image->row_pitch*origin[1] + image->slice_pitch*origin[2]; size_t size; if(region[2] == 1) { if(region[1] == 1) size = image->bpp * region[0]; else size = image->row_pitch * (region[1] - 1) + (image->bpp * (origin[0] + region[0])); } else { size = image->slice_pitch * (region[2] - 1); size += image->row_pitch * (origin[1] + region[1]); size += image->bpp * (origin[0] + region[0]); } err = _cl_map_mem(mem, &ptr, &mem_ptr, offset, size); if (err != CL_SUCCESS) goto error; TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, mem->ctx); data = &no_wait_data; data->type = EnqueueMapImage; data->mem_obj = mem; data->origin[0] = origin[0]; data->origin[1] = origin[1]; data->origin[2] = origin[2]; data->region[0] = region[0]; data->region[1] = region[1]; data->region[2] = region[2]; data->row_pitch = *image_row_pitch; if (image_slice_pitch) data->slice_pitch = *image_slice_pitch; data->ptr = ptr; data->offset = offset; if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, event, data, CL_COMMAND_MAP_IMAGE) == CL_ENQUEUE_EXECUTE_IMM) { err = cl_enqueue_handle(data); if(event) cl_event_set_status(*event, CL_COMPLETE); } error: if (errcode_ret) *errcode_ret = err; return mem_ptr; //TODO: map and unmap first } cl_int clEnqueueUnmapMemObject(cl_command_queue command_queue, cl_mem memobj, void * mapped_ptr, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) { cl_int err = CL_SUCCESS; enqueue_data *data, no_wait_data = { 0 }; CHECK_QUEUE(command_queue); CHECK_MEM(memobj); if (command_queue->ctx != memobj->ctx) { err = CL_INVALID_CONTEXT; goto error; } TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, memobj->ctx); data = &no_wait_data; data->type = EnqueueUnmapMemObject; data->mem_obj = memobj; data->ptr = mapped_ptr; if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, event, data, CL_COMMAND_UNMAP_MEM_OBJECT) == CL_ENQUEUE_EXECUTE_IMM) { err = cl_enqueue_handle(data); if(event) cl_event_set_status(*event, CL_COMPLETE); } error: return err; } cl_int clEnqueueNDRangeKernel(cl_command_queue command_queue, cl_kernel kernel, cl_uint work_dim, const size_t * global_work_offset, const size_t * global_work_size, const size_t * local_work_size, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) { size_t fixed_global_off[] = {0,0,0}; size_t fixed_global_sz[] = {1,1,1}; size_t fixed_local_sz[] = {1,1,1}; cl_int err = CL_SUCCESS; cl_uint i; enqueue_data *data, no_wait_data = { 0 }; CHECK_QUEUE(command_queue); CHECK_KERNEL(kernel); /* Check number of dimensions we have */ if (UNLIKELY(work_dim == 0 || work_dim > 3)) { err = CL_INVALID_WORK_DIMENSION; goto error; } /* We need a work size per dimension */ if (UNLIKELY(global_work_size == NULL)) { err = CL_INVALID_GLOBAL_WORK_SIZE; goto error; } if (global_work_offset != NULL) for (i = 0; i < work_dim; ++i) { if (UNLIKELY(~0LL - global_work_offset[i] > global_work_size[i])) { err = CL_INVALID_GLOBAL_OFFSET; goto error; } } /* Local sizes must be non-null and divide global sizes */ if (local_work_size != NULL) for (i = 0; i < work_dim; ++i) if (UNLIKELY(local_work_size[i] == 0 || global_work_size[i] % local_work_size[i])) { err = CL_INVALID_WORK_GROUP_SIZE; goto error; } /* Queue and kernel must share the same context */ assert(kernel->program); if (command_queue->ctx != kernel->program->ctx) { err = CL_INVALID_CONTEXT; goto error; } /* XXX No event right now */ //FATAL_IF(num_events_in_wait_list > 0, "Events are not supported"); //FATAL_IF(event_wait_list != NULL, "Events are not supported"); //FATAL_IF(event != NULL, "Events are not supported"); if (local_work_size != NULL) for (i = 0; i < work_dim; ++i) fixed_local_sz[i] = local_work_size[i]; if (global_work_size != NULL) for (i = 0; i < work_dim; ++i) fixed_global_sz[i] = global_work_size[i]; if (global_work_offset != NULL) for (i = 0; i < work_dim; ++i) fixed_global_off[i] = global_work_offset[i]; /* Do device specific checks are enqueue the kernel */ err = cl_command_queue_ND_range(command_queue, kernel, work_dim, fixed_global_off, fixed_global_sz, fixed_local_sz); if(err != CL_SUCCESS) goto error; data = &no_wait_data; data->type = EnqueueNDRangeKernel; data->queue = command_queue; if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, event, data, CL_COMMAND_NDRANGE_KERNEL) == CL_ENQUEUE_EXECUTE_IMM) { err = cl_command_queue_flush(command_queue); } error: return err; } cl_int clEnqueueTask(cl_command_queue command_queue, cl_kernel kernel, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) { const size_t global_size[3] = {1, 0, 0}; const size_t local_size[3] = {1, 0, 0}; return clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, global_size, local_size, num_events_in_wait_list, event_wait_list, event); } cl_int clEnqueueNativeKernel(cl_command_queue command_queue, void (*user_func)(void *), void * args, size_t cb_args, cl_uint num_mem_objects, const cl_mem * mem_list, const void ** args_mem_loc, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) { cl_int err = CL_SUCCESS; void *new_args = NULL; enqueue_data *data, no_wait_data = { 0 }; cl_int i; if(user_func == NULL || (args == NULL && cb_args > 0) || (args == NULL && num_mem_objects ==0) || (args != NULL && cb_args == 0) || (num_mem_objects > 0 && (mem_list == NULL || args_mem_loc == NULL)) || (num_mem_objects == 0 && (mem_list != NULL || args_mem_loc != NULL))) { err = CL_INVALID_VALUE; goto error; } //Per spec, need copy args if (cb_args) { new_args = malloc(cb_args); if (!new_args) { err = CL_OUT_OF_HOST_MEMORY; goto error; } memcpy(new_args, args, cb_args); for (i=0; ictx); data = &no_wait_data; data->type = EnqueueNativeKernel; data->mem_list = mem_list; data->ptr = new_args; data->size = cb_args; data->offset = (size_t)num_mem_objects; data->const_ptr = args_mem_loc; data->user_func = user_func; if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, event, data, CL_COMMAND_NATIVE_KERNEL) == CL_ENQUEUE_EXECUTE_IMM) { err = cl_enqueue_handle(data); if(event) cl_event_set_status(*event, CL_COMPLETE); } error: return err; } cl_int clEnqueueMarker(cl_command_queue command_queue, cl_event * event) { cl_int err = CL_SUCCESS; CHECK_QUEUE(command_queue); if(event == NULL) { err = CL_INVALID_VALUE; goto error; } cl_event_marker(command_queue, event); error: return err; } cl_int clEnqueueWaitForEvents(cl_command_queue command_queue, cl_uint num_events, const cl_event * event_list) { cl_int err = CL_SUCCESS; CHECK_QUEUE(command_queue); err = clWaitForEvents(num_events, event_list); error: return err; } cl_int clEnqueueBarrier(cl_command_queue command_queue) { cl_int err = CL_SUCCESS; CHECK_QUEUE(command_queue); cl_command_queue_set_barrier(command_queue); error: return err; } #define EXTFUNC(x) \ if (strcmp(#x, func_name) == 0) \ return (void *)x; void* clGetExtensionFunctionAddress(const char *func_name) { if (func_name == NULL) return NULL; #ifdef HAS_OCLIcd /* cl_khr_icd */ EXTFUNC(clIcdGetPlatformIDsKHR) #endif EXTFUNC(clCreateProgramWithLLVMIntel) EXTFUNC(clGetGenVersionIntel) EXTFUNC(clMapBufferIntel) EXTFUNC(clUnmapBufferIntel) EXTFUNC(clMapBufferGTTIntel) EXTFUNC(clUnmapBufferGTTIntel) EXTFUNC(clPinBufferIntel) EXTFUNC(clUnpinBufferIntel) EXTFUNC(clReportUnfreedIntel) EXTFUNC(clCreateBufferFromLibvaIntel) EXTFUNC(clCreateImageFromLibvaIntel) return NULL; } #undef EXTFUNC cl_int clReportUnfreedIntel(void) { return cl_report_unfreed(); } void* clMapBufferIntel(cl_mem mem, cl_int *errcode_ret) { void *ptr = NULL; cl_int err = CL_SUCCESS; CHECK_MEM (mem); ptr = cl_mem_map(mem); error: if (errcode_ret) *errcode_ret = err; return ptr; } cl_int clUnmapBufferIntel(cl_mem mem) { cl_int err = CL_SUCCESS; CHECK_MEM (mem); err = cl_mem_unmap(mem); error: return err; } void* clMapBufferGTTIntel(cl_mem mem, cl_int *errcode_ret) { void *ptr = NULL; cl_int err = CL_SUCCESS; CHECK_MEM (mem); ptr = cl_mem_map_gtt(mem); error: if (errcode_ret) *errcode_ret = err; return ptr; } cl_int clUnmapBufferGTTIntel(cl_mem mem) { cl_int err = CL_SUCCESS; CHECK_MEM (mem); err = cl_mem_unmap_gtt(mem); error: return err; } cl_int clPinBufferIntel(cl_mem mem) { cl_int err = CL_SUCCESS; CHECK_MEM (mem); cl_mem_pin(mem); error: return err; } cl_int clUnpinBufferIntel(cl_mem mem) { cl_int err = CL_SUCCESS; CHECK_MEM (mem); cl_mem_unpin(mem); error: return err; } cl_int clGetGenVersionIntel(cl_device_id device, cl_int *ver) { return cl_device_get_version(device, ver); } cl_program clCreateProgramWithLLVMIntel(cl_context context, cl_uint num_devices, const cl_device_id * devices, const char * filename, cl_int * errcode_ret) { return cl_program_create_from_llvm(context, num_devices, devices, filename, errcode_ret); } cl_mem clCreateBufferFromLibvaIntel(cl_context context, unsigned int bo_name, cl_int *errorcode_ret) { cl_mem mem = NULL; cl_int err = CL_SUCCESS; CHECK_CONTEXT (context); mem = cl_mem_new_libva_buffer(context, bo_name, &err); error: if (errorcode_ret) *errorcode_ret = err; return mem; } cl_mem clCreateImageFromLibvaIntel(cl_context context, const cl_libva_image *info, cl_int *errorcode_ret) { cl_mem mem = NULL; cl_int err = CL_SUCCESS; CHECK_CONTEXT (context); if (!info) { err = CL_INVALID_VALUE; goto error; } mem = cl_mem_new_libva_image(context, info->bo_name, info->offset, info->width, info->height, info->fmt, info->row_pitch, &err); error: if (errorcode_ret) *errorcode_ret = err; return mem; } Release_v0.3/src/cl_command_queue.c000066400000000000000000000325431223142177000174450ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #include "cl_command_queue.h" #include "cl_context.h" #include "cl_program.h" #include "cl_kernel.h" #include "cl_device_id.h" #include "cl_mem.h" #include "cl_utils.h" #include "cl_alloc.h" #include "cl_driver.h" #include "cl_khr_icd.h" #include #include #include LOCAL cl_command_queue cl_command_queue_new(cl_context ctx) { cl_command_queue queue = NULL; assert(ctx); TRY_ALLOC_NO_ERR (queue, CALLOC(struct _cl_command_queue)); SET_ICD(queue->dispatch) queue->magic = CL_MAGIC_QUEUE_HEADER; queue->ref_n = 1; queue->ctx = ctx; TRY_ALLOC_NO_ERR (queue->gpgpu, cl_gpgpu_new(ctx->drv)); /* Append the command queue in the list */ pthread_mutex_lock(&ctx->queue_lock); queue->next = ctx->queues; if (ctx->queues != NULL) ctx->queues->prev = queue; ctx->queues = queue; pthread_mutex_unlock(&ctx->queue_lock); /* The queue also belongs to its context */ cl_context_add_ref(ctx); exit: return queue; error: cl_command_queue_delete(queue); queue = NULL; goto exit; } LOCAL void cl_command_queue_delete(cl_command_queue queue) { assert(queue); if (atomic_dec(&queue->ref_n) != 1) return; /* Remove it from the list */ assert(queue->ctx); pthread_mutex_lock(&queue->ctx->queue_lock); if (queue->prev) queue->prev->next = queue->next; if (queue->next) queue->next->prev = queue->prev; if (queue->next == NULL && queue->prev == NULL) queue->ctx->queues = NULL; pthread_mutex_unlock(&queue->ctx->queue_lock); if (queue->fulsim_out != NULL) { cl_mem_delete(queue->fulsim_out); queue->fulsim_out = NULL; } cl_mem_delete(queue->perf); cl_context_delete(queue->ctx); cl_gpgpu_delete(queue->gpgpu); cl_free(queue->wait_events); queue->magic = CL_MAGIC_DEAD_HEADER; /* For safety */ cl_free(queue); } LOCAL void cl_command_queue_add_ref(cl_command_queue queue) { atomic_inc(&queue->ref_n); } static void set_image_info(char *curbe, struct ImageInfo * image_info, struct _cl_mem_image *image) { if (image_info->wSlot >= 0) *(uint32_t*)(curbe + image_info->wSlot) = image->w; if (image_info->hSlot >= 0) *(uint32_t*)(curbe + image_info->hSlot) = image->h; if (image_info->depthSlot >= 0) *(uint32_t*)(curbe + image_info->depthSlot) = image->depth; if (image_info->channelOrderSlot >= 0) *(uint32_t*)(curbe + image_info->channelOrderSlot) = image->fmt.image_channel_order; if (image_info->dataTypeSlot >= 0) *(uint32_t*)(curbe + image_info->dataTypeSlot) = image->fmt.image_channel_data_type; } LOCAL cl_int cl_command_queue_bind_image(cl_command_queue queue, cl_kernel k) { uint32_t i; for (i = 0; i < k->image_sz; i++) { int id = k->images[i].arg_idx; struct _cl_mem_image *image; assert(gbe_kernel_get_arg_type(k->opaque, id) == GBE_ARG_IMAGE); image = cl_mem_image(k->args[id].mem); set_image_info(k->curbe, &k->images[i], image); cl_gpgpu_bind_image(queue->gpgpu, k->images[i].idx, image->base.bo, image->offset, image->intel_fmt, image->image_type, image->w, image->h, image->depth, image->row_pitch, image->tiling); } return CL_SUCCESS; } LOCAL cl_int cl_command_queue_bind_surface(cl_command_queue queue, cl_kernel k) { /* Bind all user buffers (given by clSetKernelArg) */ uint32_t i; enum gbe_arg_type arg_type; /* kind of argument */ for (i = 0; i < k->arg_n; ++i) { uint32_t offset; // location of the address in the curbe arg_type = gbe_kernel_get_arg_type(k->opaque, i); if (arg_type != GBE_ARG_GLOBAL_PTR || !k->args[i].mem) continue; offset = gbe_kernel_get_curbe_offset(k->opaque, GBE_CURBE_KERNEL_ARGUMENT, i); cl_gpgpu_bind_buf(queue->gpgpu, k->args[i].mem->bo, offset, cc_llc_l3); } return CL_SUCCESS; } #if USE_FULSIM extern void drm_intel_bufmgr_gem_stop_aubfile(cl_buffer_mgr); extern void drm_intel_bufmgr_gem_set_aubfile(cl_buffer_mgr, FILE*); extern void aub_exec_dump_raw_file(cl_buffer, size_t offset, size_t sz); static void cl_run_fulsim(void) { const char *run_it = getenv("OCL_SIMULATOR"); const char *debug_mode = getenv("OCL_FULSIM_DEBUG_MODE"); if (run_it == NULL || strcmp(run_it, "1")) return; #if EMULATE_GEN == 7 /* IVB */ if (debug_mode == NULL || strcmp(debug_mode, "1")) system("wine AubLoad.exe dump.aub -device ivbB0"); else system("wine AubLoad.exe dump.aub -device ivbB0 -debug"); #elif EMULATE_GEN == 75 /* HSW */ if (debug_mode == NULL || strcmp(debug_mode, "1")) system("wine AubLoad.exe dump.aub -device hsw.h.a0"); else system("wine AubLoad.exe dump.aub -device hsw.h.a0 -debug"); #else #error "Unknown device" #endif } /* Each buffer is dump using several chunks of this size */ static const size_t chunk_sz = 8192u; static cl_int cl_fulsim_dump_all_surfaces(cl_command_queue queue, cl_kernel k) { cl_int err = CL_SUCCESS; cl_mem mem = NULL; int i; size_t j; /* Bind user defined surface */ for (i = 0; i < k->arg_n; ++i) { size_t chunk_n, chunk_remainder; if (gbe_kernel_get_arg_type(k->opaque, i) != GBE_ARG_GLOBAL_PTR) continue; mem = (cl_mem) k->args[i].mem; CHECK_MEM(mem); chunk_n = cl_buffer_get_size(mem->bo) / chunk_sz; chunk_remainder = cl_buffer_get_size(mem->bo) % chunk_sz; for (j = 0; j < chunk_n; ++j) aub_exec_dump_raw_file(mem->bo, j * chunk_sz, chunk_sz); if (chunk_remainder) aub_exec_dump_raw_file(mem->bo, chunk_n * chunk_sz, chunk_remainder); } error: return err; } struct bmphdr { /* 2 bytes of magic here, "BM", total header size is 54 bytes! */ int filesize; /* 4 total file size incl header */ short as0, as1; /* 8 app specific */ int bmpoffset; /* 12 ofset of bmp data */ int headerbytes; /* 16 bytes in header from this point (40 actually) */ int width; /* 20 */ int height; /* 24 */ short nplanes; /* 26 no of color planes */ short bpp; /* 28 bits/pixel */ int compression; /* 32 BI_RGB = 0 = no compression */ int sizeraw; /* 36 size of raw bmp file, excluding header, incl padding */ int hres; /* 40 horz resolutions pixels/meter */ int vres; /* 44 */ int npalcolors; /* 48 No of colors in palette */ int nimportant; /* 52 No of important colors */ /* raw b, g, r data here, dword aligned per scan line */ }; static int* cl_read_bmp(const char *filename, int *width, int *height) { int n; struct bmphdr hdr; FILE *fp = fopen(filename, "rb"); assert(fp); char magic[2]; n = fread(&magic[0], 1, 2, fp); assert(n == 2 && magic[0] == 'B' && magic[1] == 'M'); n = fread(&hdr, 1, sizeof(hdr), fp); assert(n == sizeof(hdr)); assert(hdr.width > 0 && hdr.height > 0 && hdr.nplanes == 1 && hdr.compression == 0); int *rgb32 = (int *) cl_malloc(hdr.width * hdr.height * sizeof(int)); assert(rgb32); int x, y; int *dst = rgb32; for (y = 0; y < hdr.height; y++) { for (x = 0; x < hdr.width; x++) { assert(!feof(fp)); int b = (getc(fp) & 0x0ff); int g = (getc(fp) & 0x0ff); int r = (getc(fp) & 0x0ff); *dst++ = (r | (g << 8) | (b << 16) | 0xff000000); /* abgr */ } while (x & 3) { getc(fp); x++; } } fclose(fp); *width = hdr.width; *height = hdr.height; return rgb32; } static char* cl_read_dump(const char *name, size_t *size) { char *raw = NULL, *dump = NULL; size_t i, sz; int w, h; if ((raw = (char*) cl_read_bmp(name, &w, &h)) == NULL) return NULL; sz = w * h; dump = (char*) cl_malloc(sz); assert(dump); for (i = 0; i < sz; ++i) dump[i] = raw[4*i]; cl_free(raw); if (size) *size = sz; return dump; } static cl_int cl_fulsim_read_all_surfaces(cl_command_queue queue, cl_kernel k) { cl_int err = CL_SUCCESS; cl_mem mem = NULL; char *from = NULL, *to = NULL; size_t size, j, chunk_n, chunk_remainder; int i, curr = 0; /* Bind user defined surface */ for (i = 0; i < k->arg_n; ++i) { if (gbe_kernel_get_arg_type(k->opaque, i) != GBE_ARG_GLOBAL_PTR) continue; mem = (cl_mem) k->args[i].mem; CHECK_MEM(mem); assert(mem->bo); chunk_n = cl_buffer_get_size(mem->bo) / chunk_sz; chunk_remainder = cl_buffer_get_size(mem->bo) % chunk_sz; to = cl_mem_map(mem); for (j = 0; j < chunk_n; ++j) { char name[256]; sprintf(name, "dump%03i.bmp", curr); #ifdef NDEBUG from = cl_read_dump(name, NULL); #else from = cl_read_dump(name, &size); assert(size == chunk_sz); #endif /* NDEBUG */ memcpy(to + j*chunk_sz, from, chunk_sz); cl_free(from); curr++; } if (chunk_remainder) { char name[256]; sprintf(name, "dump%03i.bmp", curr); #ifdef NDEBUG from = cl_read_dump(name, NULL); #else from = cl_read_dump(name, &size); assert(size == chunk_remainder); #endif /* NDEBUG */ memcpy(to + chunk_n*chunk_sz, from, chunk_remainder); cl_free(from); curr++; } cl_mem_unmap(mem); } error: return err; } #endif extern cl_int cl_command_queue_ND_range_gen7(cl_command_queue, cl_kernel, uint32_t, const size_t *, const size_t *, const size_t *); static cl_int cl_kernel_check_args(cl_kernel k) { uint32_t i; for (i = 0; i < k->arg_n; ++i) if (k->args[i].is_set == CL_FALSE) return CL_INVALID_KERNEL_ARGS; return CL_SUCCESS; } LOCAL cl_int cl_command_queue_ND_range(cl_command_queue queue, cl_kernel k, const uint32_t work_dim, const size_t *global_wk_off, const size_t *global_wk_sz, const size_t *local_wk_sz) { const int32_t ver = cl_driver_get_ver(queue->ctx->drv); cl_int err = CL_SUCCESS; /* Check that the user did not forget any argument */ TRY (cl_kernel_check_args, k); #if USE_FULSIM cl_buffer_mgr bufmgr = NULL; FILE *file = NULL; const char *run_it = getenv("OCL_SIMULATOR"); if (run_it != NULL && strcmp(run_it, "1") == 0) { file = fopen("dump.aub", "wb"); FATAL_IF (file == NULL, "Unable to open file dump.aub"); bufmgr = cl_context_get_bufmgr(queue->ctx); drm_intel_bufmgr_gem_set_aubfile(bufmgr, file); } #endif /* USE_FULSIM */ if (ver == 7 || ver == 75) TRY (cl_command_queue_ND_range_gen7, queue, k, work_dim, global_wk_off, global_wk_sz, local_wk_sz); else FATAL ("Unknown Gen Device"); #if USE_FULSIM if (run_it != NULL && strcmp(run_it, "1") == 0) { TRY (cl_fulsim_dump_all_surfaces, queue, k); drm_intel_bufmgr_gem_stop_aubfile(bufmgr); fclose(file); cl_run_fulsim(); TRY (cl_fulsim_read_all_surfaces, queue, k); } #endif /* USE_FULSIM */ error: return err; } LOCAL cl_int cl_command_queue_flush(cl_command_queue queue) { cl_gpgpu_flush(queue->gpgpu); return CL_SUCCESS; } LOCAL cl_int cl_command_queue_finish(cl_command_queue queue) { cl_gpgpu_sync(queue->gpgpu); return CL_SUCCESS; } #define DEFAULT_WAIT_EVENTS_SIZE 16 LOCAL void cl_command_queue_insert_event(cl_command_queue queue, cl_event event) { cl_int i=0; cl_event *new_list; assert(queue != NULL); if(queue->wait_events == NULL) { queue->wait_events_size = DEFAULT_WAIT_EVENTS_SIZE; TRY_ALLOC_NO_ERR (queue->wait_events, CALLOC_ARRAY(cl_event, queue->wait_events_size)); } for(i=0; iwait_events_num; i++) { if(queue->wait_events[i] == event) return; //is in the wait_events, need to insert } if(queue->wait_events_num < queue->wait_events_size) { queue->wait_events[queue->wait_events_num++] = event; return; } //wait_events_num == wait_events_size, array is full queue->wait_events_size *= 2; TRY_ALLOC_NO_ERR (new_list, CALLOC_ARRAY(cl_event, queue->wait_events_size)); memcpy(new_list, queue->wait_events, sizeof(cl_event)*queue->wait_events_num); cl_free(queue->wait_events); queue->wait_events = new_list; queue->wait_events[queue->wait_events_num++] = event; return; exit: return; error: if(queue->wait_events) cl_free(queue->wait_events); queue->wait_events = NULL; queue->wait_events_size = 0; queue->wait_events_num = 0; goto exit; } LOCAL void cl_command_queue_remove_event(cl_command_queue queue, cl_event event) { cl_int i=0; assert(queue->wait_events); for(i=0; iwait_events_num; i++) { if(queue->wait_events[i] == event) break; } if(i == queue->wait_events_num) return; if(queue->barrier_index >= i) queue->barrier_index -= 1; for(; iwait_events_num-1; i++) { queue->wait_events[i] = queue->wait_events[i+1]; } queue->wait_events_num -= 1; } LOCAL void cl_command_queue_set_barrier(cl_command_queue queue) { queue->barrier_index = queue->wait_events_num; } Release_v0.3/src/cl_command_queue.h000066400000000000000000000077111223142177000174510ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #ifndef __CL_COMMAND_QUEUE_H__ #define __CL_COMMAND_QUEUE_H__ #include "cl_internals.h" #include "cl_driver.h" #include "CL/cl.h" #include struct intel_gpgpu; /* Basically, this is a (kind-of) batch buffer */ struct _cl_command_queue { DEFINE_ICD(dispatch) uint64_t magic; /* To identify it as a command queue */ volatile int ref_n; /* We reference count this object */ cl_context ctx; /* Its parent context */ cl_event* wait_events; /* Point to array of non-complete user events that block this command queue */ cl_int wait_events_num; /* Number of Non-complete user events */ cl_int wait_events_size; /* The size of array that wait_events point to */ cl_int barrier_index; /* Indicate event count in wait_events as barrier events */ cl_event last_event; /* The last event in the queue, for enqueue mark used */ cl_command_queue_properties props; /* Queue properties */ cl_command_queue prev, next; /* We chain the command queues together */ cl_gpgpu gpgpu; /* Setup all GEN commands */ cl_mem perf; /* Where to put the perf counters */ cl_mem fulsim_out; /* Fulsim will output this buffer */ }; /* Allocate and initialize a new command queue. Also insert it in the list of * command queue in the associated context */ extern cl_command_queue cl_command_queue_new(cl_context); /* Destroy and deallocate the command queue */ extern void cl_command_queue_delete(cl_command_queue); /* Keep one more reference on the queue */ extern void cl_command_queue_add_ref(cl_command_queue); /* Map ND range kernel from OCL API */ extern cl_int cl_command_queue_ND_range(cl_command_queue queue, cl_kernel ker, const uint32_t work_dim, const size_t *global_work_offset, const size_t *global_work_size, const size_t *local_work_size); /* The memory object where to report the performance */ extern cl_int cl_command_queue_set_report_buffer(cl_command_queue, cl_mem); /* Fulsim will dump this buffer (mostly to check its consistency */ cl_int cl_command_queue_set_fulsim_buffer(cl_command_queue, cl_mem); /* Flush for the command queue */ extern cl_int cl_command_queue_flush(cl_command_queue); /* Wait for the completion of the command queue */ extern cl_int cl_command_queue_finish(cl_command_queue); /* Bind all the surfaces in the GPGPU state */ extern cl_int cl_command_queue_bind_surface(cl_command_queue, cl_kernel); /* Bind all the image surfaces in the GPGPU state */ extern cl_int cl_command_queue_bind_image(cl_command_queue, cl_kernel); /* Insert a user event to command's wait_events */ extern void cl_command_queue_insert_event(cl_command_queue, cl_event); /* Remove a user event from command's wait_events */ extern void cl_command_queue_remove_event(cl_command_queue, cl_event); /* Set the barrier index */ extern void cl_command_queue_set_barrier(cl_command_queue); #endif /* __CL_COMMAND_QUEUE_H__ */ Release_v0.3/src/cl_command_queue_gen7.c000066400000000000000000000270301223142177000203600ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #include "cl_command_queue.h" #include "cl_context.h" #include "cl_program.h" #include "cl_kernel.h" #include "cl_device_id.h" #include "cl_mem.h" #include "cl_utils.h" #include "cl_alloc.h" #include #include #include static INLINE size_t cl_kernel_compute_batch_sz(cl_kernel k) { return 256+32; } /* "Varing" payload is the part of the curbe that changes accross threads in the * same work group. Right now, it consists in local IDs and block IPs */ static cl_int cl_set_varying_payload(const cl_kernel ker, char *data, const size_t *local_wk_sz, size_t simd_sz, size_t cst_sz, size_t thread_n) { uint32_t *ids[3] = {NULL,NULL,NULL}; uint16_t *block_ips = NULL; size_t i, j, k, curr = 0; int32_t id_offset[3], ip_offset; cl_int err = CL_SUCCESS; id_offset[0] = gbe_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_LOCAL_ID_X, 0); id_offset[1] = gbe_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_LOCAL_ID_Y, 0); id_offset[2] = gbe_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_LOCAL_ID_Z, 0); ip_offset = gbe_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_BLOCK_IP, 0); assert(id_offset[0] >= 0 && id_offset[1] >= 0 && id_offset[2] >= 0 && ip_offset >= 0); TRY_ALLOC(ids[0], (uint32_t*) alloca(sizeof(uint32_t)*thread_n*simd_sz)); TRY_ALLOC(ids[1], (uint32_t*) alloca(sizeof(uint32_t)*thread_n*simd_sz)); TRY_ALLOC(ids[2], (uint32_t*) alloca(sizeof(uint32_t)*thread_n*simd_sz)); TRY_ALLOC(block_ips, (uint16_t*) alloca(sizeof(uint16_t)*thread_n*simd_sz)); /* 0xffff means that the lane is inactivated */ memset(block_ips, 0xff, sizeof(uint16_t)*thread_n*simd_sz); /* Compute the IDs and the block IPs */ for (k = 0; k < local_wk_sz[2]; ++k) for (j = 0; j < local_wk_sz[1]; ++j) for (i = 0; i < local_wk_sz[0]; ++i, ++curr) { ids[0][curr] = i; ids[1][curr] = j; ids[2][curr] = k; block_ips[curr] = 0; } /* Copy them to the curbe buffer */ curr = 0; for (i = 0; i < thread_n; ++i, data += cst_sz) { uint32_t *ids0 = (uint32_t *) (data + id_offset[0]); uint32_t *ids1 = (uint32_t *) (data + id_offset[1]); uint32_t *ids2 = (uint32_t *) (data + id_offset[2]); uint16_t *ips = (uint16_t *) (data + ip_offset); for (j = 0; j < simd_sz; ++j, ++curr) { ids0[j] = ids[0][curr]; ids1[j] = ids[1][curr]; ids2[j] = ids[2][curr]; ips[j] = block_ips[curr]; } } error: return err; } static void cl_upload_constant_buffer(cl_command_queue queue, cl_kernel ker) { /* calculate constant buffer size */ int32_t arg; size_t offset; gbe_program prog = ker->program->opaque; const int32_t arg_n = gbe_kernel_get_arg_num(ker->opaque); size_t global_const_size = gbe_program_get_global_constant_size(prog); uint32_t constant_buf_size = 0; for (arg = 0; arg < arg_n; ++arg) { const enum gbe_arg_type type = gbe_kernel_get_arg_type(ker->opaque, arg); if (type == GBE_ARG_CONSTANT_PTR && ker->args[arg].mem) { cl_mem mem = ker->args[arg].mem; constant_buf_size += ALIGN(mem->size, 4); } } if(global_const_size == 0 && constant_buf_size == 0) return; cl_buffer bo = cl_gpgpu_alloc_constant_buffer(queue->gpgpu, constant_buf_size + global_const_size + 4); cl_buffer_map(bo, 1); char * cst_addr = cl_buffer_get_virtual(bo); offset = 0; if (global_const_size > 0) { /* Write the global constant arrays */ gbe_program_get_global_constant_data(prog, (char*)(cst_addr+offset)); } offset += ALIGN(global_const_size, 4); if(global_const_size == 0) { /* reserve 4 bytes to get rid of 0 address */ offset += 4; } /* upload constant buffer argument */ int32_t curbe_offset = 0; for (arg = 0; arg < arg_n; ++arg) { const enum gbe_arg_type type = gbe_kernel_get_arg_type(ker->opaque, arg); if (type == GBE_ARG_CONSTANT_PTR && ker->args[arg].mem) { cl_mem mem = ker->args[arg].mem; curbe_offset = gbe_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_KERNEL_ARGUMENT, arg); assert(curbe_offset >= 0); *(uint32_t *) (ker->curbe + curbe_offset) = offset; cl_buffer_map(mem->bo, 1); void * addr = cl_buffer_get_virtual(mem->bo); memcpy(cst_addr + offset, addr, mem->size); cl_buffer_unmap(mem->bo); offset += ALIGN(mem->size, 4); } } cl_buffer_unmap(bo); } /* Will return the total amount of slm used */ static int32_t cl_curbe_fill(cl_kernel ker, const uint32_t work_dim, const size_t *global_wk_off, const size_t *global_wk_sz, const size_t *local_wk_sz, size_t thread_n) { int32_t offset; #define UPLOAD(ENUM, VALUE) \ if ((offset = gbe_kernel_get_curbe_offset(ker->opaque, ENUM, 0)) >= 0) \ *((uint32_t *) (ker->curbe + offset)) = VALUE; UPLOAD(GBE_CURBE_LOCAL_SIZE_X, local_wk_sz[0]); UPLOAD(GBE_CURBE_LOCAL_SIZE_Y, local_wk_sz[1]); UPLOAD(GBE_CURBE_LOCAL_SIZE_Z, local_wk_sz[2]); UPLOAD(GBE_CURBE_GLOBAL_SIZE_X, global_wk_sz[0]); UPLOAD(GBE_CURBE_GLOBAL_SIZE_Y, global_wk_sz[1]); UPLOAD(GBE_CURBE_GLOBAL_SIZE_Z, global_wk_sz[2]); UPLOAD(GBE_CURBE_GLOBAL_OFFSET_X, global_wk_off[0]); UPLOAD(GBE_CURBE_GLOBAL_OFFSET_Y, global_wk_off[1]); UPLOAD(GBE_CURBE_GLOBAL_OFFSET_Z, global_wk_off[2]); UPLOAD(GBE_CURBE_GROUP_NUM_X, global_wk_sz[0]/local_wk_sz[0]); UPLOAD(GBE_CURBE_GROUP_NUM_Y, global_wk_sz[1]/local_wk_sz[1]); UPLOAD(GBE_CURBE_GROUP_NUM_Z, global_wk_sz[2]/local_wk_sz[2]); UPLOAD(GBE_CURBE_THREAD_NUM, thread_n); UPLOAD(GBE_CURBE_WORK_DIM, work_dim); #undef UPLOAD /* Upload sampler information. */ offset = gbe_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_SAMPLER_INFO, 0); if (offset >= 0) { uint32_t i; for(i = 0; i < ker->sampler_sz; i++, offset += 2) { *((uint16_t *) (ker->curbe + offset)) = ker->samplers[i] & 0xFF; } } /* Write identity for the stack pointer. This is required by the stack pointer * computation in the kernel */ if ((offset = gbe_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_STACK_POINTER, 0)) >= 0) { const uint32_t simd_sz = gbe_kernel_get_simd_width(ker->opaque); uint32_t *stackptr = (uint32_t *) (ker->curbe + offset); int32_t i; for (i = 0; i < (int32_t) simd_sz; ++i) stackptr[i] = i; } /* Handle the various offsets to SLM */ const int32_t arg_n = gbe_kernel_get_arg_num(ker->opaque); /* align so that we kernel argument get good alignment */ int32_t arg, slm_offset = ALIGN(gbe_kernel_get_slm_size(ker->opaque), 32); for (arg = 0; arg < arg_n; ++arg) { const enum gbe_arg_type type = gbe_kernel_get_arg_type(ker->opaque, arg); if (type != GBE_ARG_LOCAL_PTR) continue; offset = gbe_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_KERNEL_ARGUMENT, arg); assert(offset >= 0); uint32_t *slmptr = (uint32_t *) (ker->curbe + offset); *slmptr = slm_offset; slm_offset += ker->args[arg].local_sz; } return slm_offset; } static void cl_bind_stack(cl_gpgpu gpgpu, cl_kernel ker) { cl_context ctx = ker->program->ctx; cl_device_id device = ctx->device; const int32_t per_lane_stack_sz = gbe_kernel_get_stack_size(ker->opaque); const int32_t value = GBE_CURBE_EXTRA_ARGUMENT; const int32_t sub_value = GBE_STACK_BUFFER; const int32_t offset = gbe_kernel_get_curbe_offset(ker->opaque, value, sub_value); int32_t stack_sz = per_lane_stack_sz; /* No stack required for this kernel */ if (per_lane_stack_sz == 0) return; /* The stack size is given for *each* SIMD lane. So, we accordingly compute * the size we need for the complete machine */ assert(offset >= 0); stack_sz *= gbe_kernel_get_simd_width(ker->opaque); stack_sz *= device->max_compute_unit; cl_gpgpu_set_stack(gpgpu, offset, stack_sz, cc_llc_l3); } static void cl_setup_scratch(cl_gpgpu gpgpu, cl_kernel ker) { int32_t scratch_sz = gbe_kernel_get_scratch_size(ker->opaque); cl_gpgpu_set_scratch(gpgpu, scratch_sz); } LOCAL cl_int cl_command_queue_ND_range_gen7(cl_command_queue queue, cl_kernel ker, const uint32_t work_dim, const size_t *global_wk_off, const size_t *global_wk_sz, const size_t *local_wk_sz) { cl_context ctx = queue->ctx; cl_gpgpu gpgpu = queue->gpgpu; char *final_curbe = NULL; /* Includes them and one sub-buffer per group */ cl_gpgpu_kernel kernel; const uint32_t simd_sz = cl_kernel_get_simd_width(ker); size_t i, batch_sz = 0u, local_sz = 0u; size_t cst_sz = ker->curbe_sz= gbe_kernel_get_curbe_size(ker->opaque); size_t thread_n = 0u; cl_int err = CL_SUCCESS; /* Setup kernel */ kernel.name = "KERNEL"; kernel.grf_blocks = 128; kernel.bo = ker->bo; kernel.barrierID = 0; kernel.slm_sz = 0; kernel.use_slm = gbe_kernel_use_slm(ker->opaque); /* Compute the number of HW threads we need */ TRY (cl_kernel_work_group_sz, ker, local_wk_sz, 3, &local_sz); kernel.thread_n = thread_n = (local_sz + simd_sz - 1) / simd_sz; kernel.curbe_sz = cst_sz; /* Curbe step 1: fill the constant urb buffer data shared by all threads */ if (ker->curbe) { kernel.slm_sz = cl_curbe_fill(ker, work_dim, global_wk_off, global_wk_sz, local_wk_sz, thread_n); if (kernel.slm_sz > ker->program->ctx->device->local_mem_size) return CL_OUT_OF_RESOURCES; } /* Setup the kernel */ if (queue->props & CL_QUEUE_PROFILING_ENABLE) cl_gpgpu_state_init(gpgpu, ctx->device->max_compute_unit, cst_sz / 32, 1); else cl_gpgpu_state_init(gpgpu, ctx->device->max_compute_unit, cst_sz / 32, 0); /* Bind user buffers */ cl_command_queue_bind_surface(queue, ker); /* Bind user images */ cl_command_queue_bind_image(queue, ker); /* Bind all samplers */ cl_gpgpu_bind_sampler(queue->gpgpu, ker->samplers, ker->sampler_sz); cl_setup_scratch(gpgpu, ker); /* Bind a stack if needed */ cl_bind_stack(gpgpu, ker); cl_upload_constant_buffer(queue, ker); cl_gpgpu_states_setup(gpgpu, &kernel); /* Curbe step 2. Give the localID and upload it to video memory */ if (ker->curbe) { assert(cst_sz > 0); TRY_ALLOC (final_curbe, (char*) alloca(thread_n * cst_sz)); for (i = 0; i < thread_n; ++i) { memcpy(final_curbe + cst_sz * i, ker->curbe, cst_sz); } TRY (cl_set_varying_payload, ker, final_curbe, local_wk_sz, simd_sz, cst_sz, thread_n); cl_gpgpu_upload_curbes(gpgpu, final_curbe, thread_n*cst_sz); } /* Start a new batch buffer */ batch_sz = cl_kernel_compute_batch_sz(ker); cl_gpgpu_batch_reset(gpgpu, batch_sz); cl_gpgpu_batch_start(gpgpu); /* Issue the GPGPU_WALKER command */ cl_gpgpu_walker(gpgpu, simd_sz, thread_n, global_wk_off, global_wk_sz, local_wk_sz); /* Close the batch buffer and submit it */ cl_gpgpu_batch_end(gpgpu, 0); error: return err; } Release_v0.3/src/cl_context.c000066400000000000000000000172461223142177000163120ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #include "cl_platform_id.h" #include "cl_device_id.h" #include "cl_context.h" #include "cl_command_queue.h" #include "cl_mem.h" #include "cl_alloc.h" #include "cl_utils.h" #include "cl_driver.h" #include "cl_khr_icd.h" #include "cl_kernel.h" #include "cl_program.h" #include "CL/cl.h" #include "CL/cl_gl.h" #include #include #include #include #include #define CHECK(var) \ if (var) \ return CL_INVALID_PROPERTY; \ else \ var = 1; static cl_int cl_context_properties_process(const cl_context_properties *prop, struct _cl_context_prop *cl_props, cl_uint * prop_len) { int set_cl_context_platform = 0, set_cl_gl_context_khr = 0, set_cl_egl_display_khr = 0, set_cl_glx_display_khr = 0, set_cl_wgl_hdc_khr = 0, set_cl_cgl_sharegroup_khr = 0; cl_int err = CL_SUCCESS; cl_props->gl_type = CL_GL_NOSHARE; cl_props->platform_id = 0; if (prop == NULL) goto exit; while(*prop) { switch (*prop) { case CL_CONTEXT_PLATFORM: CHECK (set_cl_context_platform); cl_props->platform_id = *(prop + 1); if (UNLIKELY((cl_platform_id) cl_props->platform_id != intel_platform)) { err = CL_INVALID_PLATFORM; goto error; } break; case CL_GL_CONTEXT_KHR: CHECK (set_cl_gl_context_khr); cl_props->gl_context = *(prop + 1); break; case CL_EGL_DISPLAY_KHR: CHECK (set_cl_egl_display_khr); cl_props->gl_type = CL_GL_EGL_DISPLAY; cl_props->egl_display = *(prop + 1); break; case CL_GLX_DISPLAY_KHR: CHECK (set_cl_glx_display_khr); cl_props->gl_type = CL_GL_GLX_DISPLAY; cl_props->glx_display = *(prop + 1); break; case CL_WGL_HDC_KHR: CHECK (set_cl_wgl_hdc_khr); cl_props->gl_type = CL_GL_WGL_HDC; cl_props->wgl_hdc = *(prop + 1); break; case CL_CGL_SHAREGROUP_KHR: CHECK (set_cl_cgl_sharegroup_khr); cl_props->gl_type = CL_GL_CGL_SHAREGROUP; cl_props->cgl_sharegroup = *(prop + 1); break; default: err = CL_INVALID_PROPERTY; goto error; } prop += 2; *prop_len += 2; } exit: error: return err; } LOCAL cl_context cl_create_context(const cl_context_properties * properties, cl_uint num_devices, const cl_device_id * devices, void (CL_CALLBACK * pfn_notify) (const char*, const void*, size_t, void*), void * user_data, cl_int * errcode_ret) { /* cl_platform_id platform = NULL; */ struct _cl_context_prop props; cl_context ctx = NULL; cl_int err = CL_SUCCESS; cl_uint prop_len = 0; /* XXX */ FATAL_IF (num_devices != 1, "Only one device is supported"); /* Check that we are getting the right platform */ if (UNLIKELY(((err = cl_context_properties_process(properties, &props, &prop_len)) != CL_SUCCESS))) goto error; /* We are good */ if (UNLIKELY((ctx = cl_context_new(&props)) == NULL)) { err = CL_OUT_OF_HOST_MEMORY; goto error; } if(properties != NULL && prop_len > 0) { TRY_ALLOC (ctx->prop_user, CALLOC_ARRAY(cl_context_properties, prop_len)); memcpy(ctx->prop_user, properties, sizeof(cl_context_properties)*prop_len); } ctx->prop_len = prop_len; /* Attach the device to the context */ ctx->device = *devices; /* Save the user callback and user data*/ ctx->pfn_notify = pfn_notify; ctx->user_data = user_data; exit: if (errcode_ret != NULL) *errcode_ret = err; return ctx; error: cl_context_delete(ctx); ctx = NULL; goto exit; } LOCAL cl_context cl_context_new(struct _cl_context_prop *props) { cl_context ctx = NULL; TRY_ALLOC_NO_ERR (ctx, CALLOC(struct _cl_context)); TRY_ALLOC_NO_ERR (ctx->drv, cl_driver_new(props)); SET_ICD(ctx->dispatch) ctx->props = *props; ctx->magic = CL_MAGIC_CONTEXT_HEADER; ctx->ref_n = 1; ctx->ver = cl_driver_get_ver(ctx->drv); pthread_mutex_init(&ctx->program_lock, NULL); pthread_mutex_init(&ctx->queue_lock, NULL); pthread_mutex_init(&ctx->buffer_lock, NULL); pthread_mutex_init(&ctx->sampler_lock, NULL); exit: return ctx; error: cl_context_delete(ctx); ctx = NULL; goto exit; } LOCAL void cl_context_delete(cl_context ctx) { if (UNLIKELY(ctx == NULL)) return; /* We are not done yet */ if (atomic_dec(&ctx->ref_n) > 1) return; /* All object lists should have been freed. Otherwise, the reference counter * of the context cannot be 0 */ assert(ctx->queues == NULL); assert(ctx->programs == NULL); assert(ctx->buffers == NULL); assert(ctx->drv); cl_free(ctx->prop_user); cl_driver_delete(ctx->drv); ctx->magic = CL_MAGIC_DEAD_HEADER; /* For safety */ cl_free(ctx); } LOCAL void cl_context_add_ref(cl_context ctx) { assert(ctx); atomic_inc(&ctx->ref_n); } LOCAL cl_command_queue cl_context_create_queue(cl_context ctx, cl_device_id device, cl_command_queue_properties properties, /* XXX */ cl_int *errcode_ret) { cl_command_queue queue = NULL; cl_int err = CL_SUCCESS; /* We create the command queue and store it in the context list of queues */ TRY_ALLOC (queue, cl_command_queue_new(ctx)); queue->props = properties; exit: if (errcode_ret) *errcode_ret = err; return queue; error: cl_command_queue_delete(queue); queue = NULL; goto exit; } cl_buffer_mgr cl_context_get_bufmgr(cl_context ctx) { return cl_driver_get_bufmgr(ctx->drv); } cl_kernel cl_context_get_static_kernel(cl_context ctx, cl_int index, const char * str_kernel, const char * str_option) { cl_int ret; if (!ctx->internal_prgs[index]) { size_t length = strlen(str_kernel) + 1; ctx->internal_prgs[index] = cl_program_create_from_source(ctx, 1, &str_kernel, &length, NULL); if (!ctx->internal_prgs[index]) return NULL; ret = cl_program_build(ctx->internal_prgs[index], str_option); if (ret != CL_SUCCESS) return NULL; ctx->internal_prgs[index]->is_built = 1; ctx->internel_kernels[index] = cl_kernel_dup(ctx->internal_prgs[index]->ker[0]); } return ctx->internel_kernels[index]; } cl_kernel cl_context_get_static_kernel_form_bin(cl_context ctx, cl_int index, const char * str_kernel, size_t size, const char * str_option) { cl_int ret; cl_int binary_status = CL_SUCCESS; if (!ctx->internal_prgs[index]) { ctx->internal_prgs[index] = cl_program_create_from_binary(ctx, 1, &ctx->device, &size, (const unsigned char **)&str_kernel, &binary_status, &ret); if (!ctx->internal_prgs[index]) return NULL; ret = cl_program_build(ctx->internal_prgs[index], str_option); if (ret != CL_SUCCESS) return NULL; ctx->internal_prgs[index]->is_built = 1; ctx->internel_kernels[index] = cl_kernel_dup(ctx->internal_prgs[index]->ker[0]); } return ctx->internel_kernels[index]; } Release_v0.3/src/cl_context.h000066400000000000000000000141121223142177000163040ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #ifndef __CL_CONTEXT_H__ #define __CL_CONTEXT_H__ #include "cl_internals.h" #include "cl_driver.h" #include "CL/cl.h" #include "cl_khr_icd.h" #include #include /* DRI device created at create context */ struct intel_driver; enum _cl_gl_context_type { CL_GL_NOSHARE, CL_GL_EGL_DISPLAY, CL_GL_GLX_DISPLAY, CL_GL_WGL_HDC, CL_GL_CGL_SHAREGROUP }; enum _cl_internal_ker_type { CL_ENQUEUE_COPY_BUFFER_ALIGN1 = 0, CL_ENQUEUE_COPY_BUFFER_ALIGN4, CL_ENQUEUE_COPY_BUFFER_ALIGN16, CL_ENQUEUE_COPY_BUFFER_RECT, CL_ENQUEUE_COPY_IMAGE_0, //copy image 2d to image 2d CL_ENQUEUE_COPY_IMAGE_1, //copy image 3d to image 2d CL_ENQUEUE_COPY_IMAGE_2, //copy image 2d to image 3d CL_ENQUEUE_COPY_IMAGE_3, //copy image 3d to image 3d CL_ENQUEUE_COPY_IMAGE_TO_BUFFER_0, //copy image 2d to buffer CL_ENQUEUE_COPY_IMAGE_TO_BUFFER_1, //copy image 3d tobuffer CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_0, //copy buffer to image 2d CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_1, //copy buffer to image 3d CL_INTERNAL_KERNEL_MAX }; struct _cl_context_prop { cl_context_properties platform_id; enum _cl_gl_context_type gl_type; cl_context_properties gl_context; union { cl_context_properties egl_display; cl_context_properties glx_display; cl_context_properties wgl_hdc; cl_context_properties cgl_sharegroup; }; }; #define IS_EGL_CONTEXT(ctx) (ctx->props.gl_type == CL_GL_EGL_DISPLAY) #define EGL_DISP(ctx) (EGLDisplay)(ctx->props.egl_display) #define EGL_CTX(ctx) (EGLContext)(ctx->props.gl_context) /* Encapsulate the whole device */ struct _cl_context { DEFINE_ICD(dispatch) uint64_t magic; /* To identify it as a context */ volatile int ref_n; /* We reference count this object */ cl_driver drv; /* Handles HW or simulator */ cl_device_id device; /* All information about the GPU device */ cl_command_queue queues; /* All command queues currently allocated */ cl_program programs; /* All programs currently allocated */ cl_mem buffers; /* All memory object currently allocated */ cl_sampler samplers; /* All sampler object currently allocated */ cl_event events; /* All event object currently allocated */ pthread_mutex_t queue_lock; /* To allocate and deallocate queues */ pthread_mutex_t program_lock; /* To allocate and deallocate programs */ pthread_mutex_t buffer_lock; /* To allocate and deallocate buffers */ pthread_mutex_t sampler_lock; /* To allocate and deallocate samplers */ pthread_mutex_t event_lock; /* To allocate and deallocate events */ cl_program internal_prgs[CL_INTERNAL_KERNEL_MAX]; /* All programs internal used, for example clEnqueuexxx api use */ cl_kernel internel_kernels[CL_INTERNAL_KERNEL_MAX]; /* All kernels for clenqueuexxx api, for example clEnqueuexxx api use */ uint32_t ver; /* Gen version */ struct _cl_context_prop props; cl_context_properties * prop_user; /* a copy of user passed context properties when create context */ cl_uint prop_len; /* count of the properties */ void (CL_CALLBACK *pfn_notify)(const char *, const void *, size_t, void *); /* User's callback when error occur in context */ void *user_data; /* A pointer to user supplied data */ }; /* Implement OpenCL function */ extern cl_context cl_create_context(const cl_context_properties*, cl_uint, const cl_device_id*, void (CL_CALLBACK * pfn_notify) (const char*, const void*, size_t, void*), void *, cl_int*); /* Allocate and initialize a context */ extern cl_context cl_context_new(struct _cl_context_prop *); /* Destroy and deallocate a context */ extern void cl_context_delete(cl_context); /* Increment the context reference counter */ extern void cl_context_add_ref(cl_context); /* Create the command queue from the given context and device */ extern cl_command_queue cl_context_create_queue(cl_context, cl_device_id, cl_command_queue_properties, cl_int*); /* Enqueue a ND Range kernel */ extern cl_int cl_context_ND_kernel(cl_context, cl_command_queue, cl_kernel, cl_uint, const size_t*, const size_t*, const size_t*); /* Used for allocation */ extern cl_buffer_mgr cl_context_get_bufmgr(cl_context ctx); /* Get the internal used kernel */ extern cl_kernel cl_context_get_static_kernel(cl_context ctx, cl_int index, const char *str_kernel, const char * str_option); /* Get the internal used kernel from binary*/ extern cl_kernel cl_context_get_static_kernel_form_bin(cl_context ctx, cl_int index, const char * str_kernel, size_t size, const char * str_option); #endif /* __CL_CONTEXT_H__ */ Release_v0.3/src/cl_device_data.h000066400000000000000000000073061223142177000170570ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #ifndef __CL_DEVICE_DATA_H__ #define __CL_DEVICE_DATA_H__ #define PCI_CHIP_GM45_GM 0x2A42 #define PCI_CHIP_IGD_E_G 0x2E02 #define PCI_CHIP_Q45_G 0x2E12 #define PCI_CHIP_G45_G 0x2E22 #define PCI_CHIP_G41_G 0x2E32 #define PCI_CHIP_IGDNG_D_G 0x0042 #define PCI_CHIP_IGDNG_M_G 0x0046 #define IS_G45(devid) (devid == PCI_CHIP_IGD_E_G || \ devid == PCI_CHIP_Q45_G || \ devid == PCI_CHIP_G45_G || \ devid == PCI_CHIP_G41_G) #define IS_GM45(devid) (devid == PCI_CHIP_GM45_GM) #define IS_G4X(devid) (IS_G45(devid) || IS_GM45(devid)) #define IS_IGDNG_D(devid) (devid == PCI_CHIP_IGDNG_D_G) #define IS_IGDNG_M(devid) (devid == PCI_CHIP_IGDNG_M_G) #define IS_IGDNG(devid) (IS_IGDNG_D(devid) || IS_IGDNG_M(devid)) #ifndef PCI_CHIP_SANDYBRIDGE_BRIDGE #define PCI_CHIP_SANDYBRIDGE_BRIDGE 0x0100 /* Desktop */ #define PCI_CHIP_SANDYBRIDGE_GT1 0x0102 #define PCI_CHIP_SANDYBRIDGE_GT2 0x0112 #define PCI_CHIP_SANDYBRIDGE_GT2_PLUS 0x0122 #define PCI_CHIP_SANDYBRIDGE_BRIDGE_M 0x0104 /* Mobile */ #define PCI_CHIP_SANDYBRIDGE_M_GT1 0x0106 #define PCI_CHIP_SANDYBRIDGE_M_GT2 0x0116 #define PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS 0x0126 #define PCI_CHIP_SANDYBRIDGE_BRIDGE_S 0x0108 /* Server */ #define PCI_CHIP_SANDYBRIDGE_S_GT 0x010A #endif #define IS_GEN6(devid) \ (devid == PCI_CHIP_SANDYBRIDGE_GT1 || \ devid == PCI_CHIP_SANDYBRIDGE_GT2 || \ devid == PCI_CHIP_SANDYBRIDGE_GT2_PLUS || \ devid == PCI_CHIP_SANDYBRIDGE_M_GT1 || \ devid == PCI_CHIP_SANDYBRIDGE_M_GT2 || \ devid == PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS || \ devid == PCI_CHIP_SANDYBRIDGE_S_GT) #define PCI_CHIP_IVYBRIDGE_GT1 0x0152 /* Desktop */ #define PCI_CHIP_IVYBRIDGE_GT2 0x0162 #define PCI_CHIP_IVYBRIDGE_M_GT1 0x0156 /* Mobile */ #define PCI_CHIP_IVYBRIDGE_M_GT2 0x0166 #define PCI_CHIP_IVYBRIDGE_S_GT1 0x015a /* Server */ #define IS_IVB_GT1(devid) \ (devid == PCI_CHIP_IVYBRIDGE_GT1 || \ devid == PCI_CHIP_IVYBRIDGE_M_GT1 || \ devid == PCI_CHIP_IVYBRIDGE_S_GT1) #define IS_IVB_GT2(devid) \ (devid == PCI_CHIP_IVYBRIDGE_GT2 || \ devid == PCI_CHIP_IVYBRIDGE_M_GT2) #define IS_IVYBRIDGE(devid) (IS_IVB_GT1(devid) || IS_IVB_GT2(devid)) #define IS_GEN7(devid) IS_IVYBRIDGE(devid) #define PCI_CHIP_HASWELL_M0 0x0094 #define PCI_CHIP_HASWELL_D0 0x0090 #define PCI_CHIP_HASWELL_M 0x0091 #define PCI_CHIP_HASWELL_L 0x0092 #define IS_HASWELL(devid) ((devid) == PCI_CHIP_HASWELL_M0 || \ (devid) == PCI_CHIP_HASWELL_D0 || \ (devid) == PCI_CHIP_HASWELL_M || \ (devid) == PCI_CHIP_HASWELL_L) #define IS_GEN75(devid) IS_HASWELL(devid) #endif /* __CL_DEVICE_DATA_H__ */ Release_v0.3/src/cl_device_id.c000066400000000000000000000261001223142177000165260ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #include "cl_platform_id.h" #include "cl_device_id.h" #include "cl_internals.h" #include "cl_utils.h" #include "cl_driver.h" #include "cl_device_data.h" #include "cl_khr_icd.h" #include "CL/cl.h" #include #include #include #ifndef CL_VERSION_1_2 #define CL_DEVICE_BUILT_IN_KERNELS 0x103F #endif static struct _cl_device_id intel_ivb_gt2_device = { INIT_ICD(dispatch) .max_compute_unit = 128, .max_thread_per_unit = 8, .max_work_item_sizes = {512, 512, 512}, .max_work_group_size = 1024, .max_clock_frequency = 1000, .wg_sz = 1024, .compile_wg_sz = {0}, #include "cl_gen7_device.h" }; static struct _cl_device_id intel_ivb_gt1_device = { INIT_ICD(dispatch) .max_compute_unit = 64, .max_thread_per_unit = 8, .max_work_item_sizes = {512, 512, 512}, .max_work_group_size = 512, .max_clock_frequency = 1000, .wg_sz = 512, .compile_wg_sz = {0}, #include "cl_gen7_device.h" }; /* XXX we clone IVB for HSW now */ static struct _cl_device_id intel_hsw_device = { INIT_ICD(dispatch) .max_compute_unit = 64, .max_thread_per_unit = 8, .max_work_item_sizes = {512, 512, 512}, .max_work_group_size = 512, .max_clock_frequency = 1000, .wg_sz = 512, .compile_wg_sz = {0}, #include "cl_gen75_device.h" }; LOCAL cl_device_id cl_get_gt_device(void) { cl_device_id ret = NULL; const int device_id = cl_driver_get_device_id(); /* XXX we pick IVB for HSW now */ if (device_id == PCI_CHIP_HASWELL_M || device_id == PCI_CHIP_HASWELL_L || device_id == PCI_CHIP_HASWELL_M0 || device_id == PCI_CHIP_HASWELL_D0) { intel_hsw_device.vendor_id = device_id; intel_hsw_device.platform = intel_platform; ret = &intel_hsw_device; } else if (device_id == PCI_CHIP_IVYBRIDGE_GT1 || device_id == PCI_CHIP_IVYBRIDGE_M_GT1 || device_id == PCI_CHIP_IVYBRIDGE_S_GT1) { intel_ivb_gt1_device.vendor_id = device_id; intel_ivb_gt1_device.platform = intel_platform; ret = &intel_ivb_gt1_device; } else if (device_id == PCI_CHIP_IVYBRIDGE_GT2 || device_id == PCI_CHIP_IVYBRIDGE_M_GT2) { intel_ivb_gt2_device.vendor_id = device_id; intel_ivb_gt2_device.platform = intel_platform; ret = &intel_ivb_gt2_device; } return ret; } LOCAL cl_int cl_get_device_ids(cl_platform_id platform, cl_device_type device_type, cl_uint num_entries, cl_device_id * devices, cl_uint * num_devices) { cl_device_id device; /* Do we have a usable device? */ device = cl_get_gt_device(); if (!device) { if (num_devices) *num_devices = 0; if (devices) *devices = 0; return CL_DEVICE_NOT_FOUND; } else { if (num_devices) *num_devices = 1; if (devices) { *devices = device; (*devices)->extensions = intel_platform->extensions; (*devices)->extensions_sz = intel_platform->extensions_sz; } return CL_SUCCESS; } } #define DECL_FIELD(CASE,FIELD) \ case JOIN(CL_DEVICE_,CASE): \ if (param_value_size_ret) { \ *param_value_size_ret = sizeof device->FIELD; \ if (!param_value) \ return CL_SUCCESS; \ } \ if (param_value_size < sizeof device->FIELD) \ return CL_INVALID_VALUE; \ memcpy(param_value, &device->FIELD, sizeof device->FIELD); \ return CL_SUCCESS; #define DECL_STRING_FIELD(CASE,FIELD) \ case JOIN(CL_DEVICE_,CASE): \ if (param_value_size_ret) { \ *param_value_size_ret = device->JOIN(FIELD,_sz); \ if (!param_value) \ return CL_SUCCESS; \ } \ if (param_value_size < device->JOIN(FIELD,_sz)) \ return CL_INVALID_VALUE; \ memcpy(param_value, device->FIELD, device->JOIN(FIELD,_sz)); \ return CL_SUCCESS; LOCAL cl_int cl_get_device_info(cl_device_id device, cl_device_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) { if (UNLIKELY(device != &intel_ivb_gt1_device && device != &intel_ivb_gt2_device && device != &intel_hsw_device)) return CL_INVALID_DEVICE; /* Find the correct parameter */ switch (param_name) { DECL_FIELD(TYPE, device_type) DECL_FIELD(VENDOR_ID, vendor_id) DECL_FIELD(MAX_COMPUTE_UNITS, max_compute_unit) DECL_FIELD(MAX_WORK_ITEM_DIMENSIONS, max_work_item_dimensions) DECL_FIELD(MAX_WORK_ITEM_SIZES, max_work_item_sizes) DECL_FIELD(MAX_WORK_GROUP_SIZE, max_work_group_size) DECL_FIELD(PREFERRED_VECTOR_WIDTH_CHAR, preferred_vector_width_char) DECL_FIELD(PREFERRED_VECTOR_WIDTH_SHORT, preferred_vector_width_short) DECL_FIELD(PREFERRED_VECTOR_WIDTH_INT, preferred_vector_width_int) DECL_FIELD(PREFERRED_VECTOR_WIDTH_LONG, preferred_vector_width_long) DECL_FIELD(PREFERRED_VECTOR_WIDTH_FLOAT, preferred_vector_width_float) DECL_FIELD(PREFERRED_VECTOR_WIDTH_DOUBLE, preferred_vector_width_double) DECL_FIELD(PREFERRED_VECTOR_WIDTH_HALF, preferred_vector_width_half) DECL_FIELD(NATIVE_VECTOR_WIDTH_CHAR, native_vector_width_char) DECL_FIELD(NATIVE_VECTOR_WIDTH_SHORT, native_vector_width_short) DECL_FIELD(NATIVE_VECTOR_WIDTH_INT, native_vector_width_int) DECL_FIELD(NATIVE_VECTOR_WIDTH_LONG, native_vector_width_long) DECL_FIELD(NATIVE_VECTOR_WIDTH_FLOAT, native_vector_width_float) DECL_FIELD(NATIVE_VECTOR_WIDTH_DOUBLE, native_vector_width_double) DECL_FIELD(NATIVE_VECTOR_WIDTH_HALF, native_vector_width_half) DECL_FIELD(MAX_CLOCK_FREQUENCY, max_clock_frequency) DECL_FIELD(ADDRESS_BITS, address_bits) DECL_FIELD(MAX_MEM_ALLOC_SIZE, max_mem_alloc_size) DECL_FIELD(IMAGE_SUPPORT, image_support) DECL_FIELD(MAX_READ_IMAGE_ARGS, max_read_image_args) DECL_FIELD(MAX_WRITE_IMAGE_ARGS, max_write_image_args) DECL_FIELD(IMAGE2D_MAX_WIDTH, image2d_max_width) DECL_FIELD(IMAGE2D_MAX_HEIGHT, image2d_max_height) DECL_FIELD(IMAGE3D_MAX_WIDTH, image3d_max_width) DECL_FIELD(IMAGE3D_MAX_HEIGHT, image3d_max_height) DECL_FIELD(IMAGE3D_MAX_DEPTH, image3d_max_depth) DECL_FIELD(MAX_SAMPLERS, max_samplers) DECL_FIELD(MAX_PARAMETER_SIZE, max_parameter_size) DECL_FIELD(MEM_BASE_ADDR_ALIGN, mem_base_addr_align) DECL_FIELD(MIN_DATA_TYPE_ALIGN_SIZE, min_data_type_align_size) DECL_FIELD(SINGLE_FP_CONFIG, single_fp_config) DECL_FIELD(GLOBAL_MEM_CACHE_TYPE, global_mem_cache_type) DECL_FIELD(GLOBAL_MEM_CACHELINE_SIZE, global_mem_cache_line_size) DECL_FIELD(GLOBAL_MEM_CACHE_SIZE, global_mem_cache_size) DECL_FIELD(GLOBAL_MEM_SIZE, global_mem_size) DECL_FIELD(MAX_CONSTANT_BUFFER_SIZE, max_constant_buffer_size) DECL_FIELD(MAX_CONSTANT_ARGS, max_constant_args) DECL_FIELD(LOCAL_MEM_TYPE, local_mem_type) DECL_FIELD(LOCAL_MEM_SIZE, local_mem_size) DECL_FIELD(ERROR_CORRECTION_SUPPORT, error_correction_support) DECL_FIELD(HOST_UNIFIED_MEMORY, host_unified_memory) DECL_FIELD(PROFILING_TIMER_RESOLUTION, profiling_timer_resolution) DECL_FIELD(ENDIAN_LITTLE, endian_little) DECL_FIELD(AVAILABLE, available) DECL_FIELD(COMPILER_AVAILABLE, compiler_available) DECL_FIELD(EXECUTION_CAPABILITIES, execution_capabilities) DECL_FIELD(QUEUE_PROPERTIES, queue_properties) DECL_FIELD(PLATFORM, platform) DECL_STRING_FIELD(NAME, name) DECL_STRING_FIELD(VENDOR, vendor) DECL_STRING_FIELD(VERSION, version) DECL_STRING_FIELD(PROFILE, profile) DECL_STRING_FIELD(OPENCL_C_VERSION, opencl_c_version) DECL_STRING_FIELD(EXTENSIONS, extensions); DECL_STRING_FIELD(BUILT_IN_KERNELS, built_in_kernels) case CL_DRIVER_VERSION: if (param_value_size_ret) { *param_value_size_ret = device->driver_version_sz; if (!param_value) return CL_SUCCESS; } if (param_value_size < device->driver_version_sz) return CL_INVALID_VALUE; memcpy(param_value, device->driver_version, device->driver_version_sz); return CL_SUCCESS; default: return CL_INVALID_VALUE; }; } LOCAL cl_int cl_device_get_version(cl_device_id device, cl_int *ver) { if (UNLIKELY(device != &intel_ivb_gt1_device && device != &intel_ivb_gt2_device && device != &intel_hsw_device)) return CL_INVALID_DEVICE; if (ver == NULL) return CL_SUCCESS; if (device == &intel_ivb_gt1_device || device == &intel_ivb_gt2_device) *ver = 7; else *ver = 75; return CL_SUCCESS; } #undef DECL_FIELD #define DECL_FIELD(CASE,FIELD) \ case JOIN(CL_KERNEL_,CASE): \ if (param_value_size < sizeof(((cl_device_id)NULL)->FIELD)) \ return CL_INVALID_VALUE; \ if (param_value_size_ret != NULL) \ *param_value_size_ret = sizeof(((cl_device_id)NULL)->FIELD);\ memcpy(param_value, \ &device->FIELD, \ sizeof(((cl_device_id)NULL)->FIELD)); \ return CL_SUCCESS; LOCAL cl_int cl_get_kernel_workgroup_info(cl_device_id device, cl_kernel_work_group_info param_name, size_t param_value_size, void* param_value, size_t* param_value_size_ret) { if (UNLIKELY(device != &intel_ivb_gt1_device && device != &intel_ivb_gt2_device)) return CL_INVALID_DEVICE; if (UNLIKELY(param_value == NULL)) return CL_INVALID_VALUE; switch (param_name) { DECL_FIELD(WORK_GROUP_SIZE, wg_sz) DECL_FIELD(COMPILE_WORK_GROUP_SIZE, compile_wg_sz) default: return CL_INVALID_VALUE; }; } Release_v0.3/src/cl_device_id.h000066400000000000000000000110051223142177000165310ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #ifndef __CL_DEVICE_ID_H__ #define __CL_DEVICE_ID_H__ /* Store complete information about the device */ struct _cl_device_id { DEFINE_ICD(dispatch) cl_device_type device_type; cl_uint vendor_id; cl_uint max_compute_unit; cl_uint max_thread_per_unit; cl_uint max_work_item_dimensions; size_t max_work_item_sizes[3]; size_t max_work_group_size; cl_uint preferred_vector_width_char; cl_uint preferred_vector_width_short; cl_uint preferred_vector_width_int; cl_uint preferred_vector_width_long; cl_uint preferred_vector_width_float; cl_uint preferred_vector_width_double; cl_uint preferred_vector_width_half; cl_uint native_vector_width_char; cl_uint native_vector_width_short; cl_uint native_vector_width_int; cl_uint native_vector_width_long; cl_uint native_vector_width_float; cl_uint native_vector_width_double; cl_uint native_vector_width_half; cl_uint max_clock_frequency; cl_uint address_bits; cl_ulong max_mem_alloc_size; cl_bool image_support; cl_uint max_read_image_args; cl_uint max_write_image_args; size_t image2d_max_width; size_t image2d_max_height; size_t image3d_max_width; size_t image3d_max_height; size_t image3d_max_depth; cl_uint max_samplers; size_t max_parameter_size; cl_uint mem_base_addr_align; cl_uint min_data_type_align_size; cl_device_fp_config single_fp_config; cl_device_mem_cache_type global_mem_cache_type; cl_uint global_mem_cache_line_size; cl_ulong global_mem_cache_size; cl_ulong global_mem_size; cl_ulong max_constant_buffer_size; cl_uint max_constant_args; cl_device_local_mem_type local_mem_type; cl_ulong local_mem_size; cl_bool error_correction_support; cl_bool host_unified_memory; size_t profiling_timer_resolution; cl_bool endian_little; cl_bool available; cl_bool compiler_available; cl_device_exec_capabilities execution_capabilities; cl_command_queue_properties queue_properties; cl_platform_id platform; const char *name; const char *vendor; const char *version; const char *profile; const char *opencl_c_version; const char *extensions; const char *driver_version; const char *built_in_kernels; size_t name_sz; size_t vendor_sz; size_t version_sz; size_t profile_sz; size_t opencl_c_version_sz; size_t extensions_sz; size_t driver_version_sz; size_t built_in_kernels_sz; /* Kernel specific info that we're assigning statically */ size_t wg_sz; size_t compile_wg_sz[3]; }; /* Get a device from the given platform */ extern cl_int cl_get_device_ids(cl_platform_id platform, cl_device_type device_type, cl_uint num_entries, cl_device_id * devices, cl_uint * num_devices); /* Get the intel GPU device we currently have in this machine (if any) */ extern cl_device_id cl_get_gt_device(void); /* Provide info about the device */ extern cl_int cl_get_device_info(cl_device_id device, cl_device_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret); extern cl_int cl_get_kernel_workgroup_info(cl_device_id device, cl_kernel_work_group_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret); /* Returns the Gen device ID */ extern cl_int cl_device_get_version(cl_device_id device, cl_int *ver); #endif /* __CL_DEVICE_ID_H__ */ Release_v0.3/src/cl_driver.cpp000066400000000000000000000022621223142177000164510ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ extern "C" { #include "intel/intel_driver.h" #include "cl_utils.h" #include #include } namespace { /*! Just use c++ pre-main to initialize the call-backs */ struct OCLDriverCallBackInitializer { OCLDriverCallBackInitializer(void) { intel_setup_callbacks(); } }; /*! Set the call backs at pre-main time */ static OCLDriverCallBackInitializer cbInitializer; } /* namespace */ Release_v0.3/src/cl_driver.h000066400000000000000000000316041223142177000161200ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #ifndef __CL_DRIVER_H__ #define __CL_DRIVER_H__ #include #include #include "cl_driver_type.h" /* Various limitations we should remove actually */ #define GEN_MAX_SURFACES 128 #define GEN_MAX_SAMPLERS 16 /************************************************************************** * cl_driver: * Hide behind some call backs the buffer allocation / deallocation ... This * will allow us to make the use of a software performance simulator easier and * to minimize the code specific for the HW and for the simulator **************************************************************************/ /* Create a new driver */ typedef cl_driver (cl_driver_new_cb)(cl_context_prop); extern cl_driver_new_cb *cl_driver_new; /* Delete the driver */ typedef void (cl_driver_delete_cb)(cl_driver); extern cl_driver_delete_cb *cl_driver_delete; /* Get the buffer manager from the driver */ typedef cl_buffer_mgr (cl_driver_get_bufmgr_cb)(cl_driver); extern cl_driver_get_bufmgr_cb *cl_driver_get_bufmgr; /* Get the Gen version from the driver */ typedef uint32_t (cl_driver_get_ver_cb)(cl_driver); extern cl_driver_get_ver_cb *cl_driver_get_ver; /************************************************************************** * GPGPU command streamer **************************************************************************/ /* Describe texture tiling */ typedef enum cl_gpgpu_tiling { GPGPU_NO_TILE = 0, GPGPU_TILE_X = 1, GPGPU_TILE_Y = 2, } cl_gpgpu_tiling; /* Cache control options */ typedef enum cl_cache_control { cc_gtt = 0x0, cc_l3 = 0x1, cc_llc = 0x2, cc_llc_l3 = 0x3 } cl_cache_control; typedef enum gpu_command_status { command_queued = 3, command_submitted = 2, command_running = 1, command_complete = 0 } gpu_command_status; /* Use this structure to bind kernels in the gpgpu state */ typedef struct cl_gpgpu_kernel { const char *name; /* kernel name and bo name */ uint32_t grf_blocks; /* register blocks kernel wants (in 8 reg blocks) */ uint32_t curbe_sz; /* total size of all curbes */ cl_buffer bo; /* kernel code in the proper addr space */ int32_t barrierID; /* barrierID for _this_ kernel */ uint32_t use_slm:1; /* For gen7 (automatic barrier management) */ uint32_t thread_n:15; /* For gen7 (automatic barrier management) */ uint32_t slm_sz; /* For gen7 (automatic SLM allocation) */ } cl_gpgpu_kernel; /* Create a new gpgpu state */ typedef cl_gpgpu (cl_gpgpu_new_cb)(cl_driver); extern cl_gpgpu_new_cb *cl_gpgpu_new; /* Delete the gpgpu state */ typedef void (cl_gpgpu_delete_cb)(cl_gpgpu); extern cl_gpgpu_delete_cb *cl_gpgpu_delete; /* Synchonize GPU with CPU */ typedef cl_gpgpu (cl_gpgpu_sync_cb)(cl_gpgpu); extern cl_gpgpu_sync_cb *cl_gpgpu_sync; /* Bind a regular unformatted buffer */ typedef void (cl_gpgpu_bind_buf_cb)(cl_gpgpu, cl_buffer, uint32_t offset, uint32_t cchint); extern cl_gpgpu_bind_buf_cb *cl_gpgpu_bind_buf; /* bind samplers defined in both kernel and kernel args. */ typedef void (cl_gpgpu_bind_sampler_cb)(cl_gpgpu, uint32_t *samplers, size_t sampler_sz); extern cl_gpgpu_bind_sampler_cb *cl_gpgpu_bind_sampler; /* Set a 2d texture */ typedef void (cl_gpgpu_bind_image_cb)(cl_gpgpu state, uint32_t id, cl_buffer obj_bo, uint32_t obj_bo_offset, uint32_t format, uint32_t type, int32_t w, int32_t h, int32_t depth, int pitch, cl_gpgpu_tiling tiling); extern cl_gpgpu_bind_image_cb *cl_gpgpu_bind_image; /* Setup a stack */ typedef void (cl_gpgpu_set_stack_cb)(cl_gpgpu, uint32_t offset, uint32_t size, uint32_t cchint); extern cl_gpgpu_set_stack_cb *cl_gpgpu_set_stack; /* Setup scratch */ typedef void (cl_gpgpu_set_scratch_cb)(cl_gpgpu, uint32_t per_thread_size); extern cl_gpgpu_set_scratch_cb *cl_gpgpu_set_scratch; /* Configure internal state */ typedef void (cl_gpgpu_state_init_cb)(cl_gpgpu, uint32_t max_threads, uint32_t size_cs_entry, int profiling); extern cl_gpgpu_state_init_cb *cl_gpgpu_state_init; /* Set the buffer object where to report performance counters */ typedef void (cl_gpgpu_set_perf_counters_cb)(cl_gpgpu, cl_buffer perf); extern cl_gpgpu_set_perf_counters_cb *cl_gpgpu_set_perf_counters; /* Fills current curbe buffer with data */ typedef void (cl_gpgpu_upload_curbes_cb)(cl_gpgpu, const void* data, uint32_t size); extern cl_gpgpu_upload_curbes_cb *cl_gpgpu_upload_curbes; typedef cl_buffer (cl_gpgpu_alloc_constant_buffer_cb)(cl_gpgpu, uint32_t size); extern cl_gpgpu_alloc_constant_buffer_cb *cl_gpgpu_alloc_constant_buffer; /* Setup all indirect states */ typedef void (cl_gpgpu_states_setup_cb)(cl_gpgpu, cl_gpgpu_kernel *kernel); extern cl_gpgpu_states_setup_cb *cl_gpgpu_states_setup; /* Upload the constant samplers as specified inside the OCL kernel */ typedef void (cl_gpgpu_upload_samplers_cb)(cl_gpgpu *state, const void *data, uint32_t n); extern cl_gpgpu_upload_samplers_cb *cl_gpgpu_upload_samplers; /* Set a sampler */ typedef void (cl_gpgpu_set_sampler_cb)(cl_gpgpu, uint32_t index, uint32_t non_normalized); extern cl_gpgpu_set_sampler_cb *cl_gpgpu_set_sampler; /* Allocate the batch buffer and return the BO used for the batch buffer */ typedef void (cl_gpgpu_batch_reset_cb)(cl_gpgpu, size_t sz); extern cl_gpgpu_batch_reset_cb *cl_gpgpu_batch_reset; /* Atomic begin, pipeline select, urb, pipeline state and constant buffer */ typedef void (cl_gpgpu_batch_start_cb)(cl_gpgpu); extern cl_gpgpu_batch_start_cb *cl_gpgpu_batch_start; /* atomic end with possibly inserted flush */ typedef void (cl_gpgpu_batch_end_cb)(cl_gpgpu, int32_t flush_mode); extern cl_gpgpu_batch_end_cb *cl_gpgpu_batch_end; /* Flush the command buffer */ typedef void (cl_gpgpu_flush_cb)(cl_gpgpu); extern cl_gpgpu_flush_cb *cl_gpgpu_flush; /* new a event for a batch buffer */ typedef cl_gpgpu_event (cl_gpgpu_event_new_cb)(cl_gpgpu); extern cl_gpgpu_event_new_cb *cl_gpgpu_event_new; /* new a event for a batch buffer */ typedef int (cl_gpgpu_event_update_status_cb)(cl_gpgpu_event, int); extern cl_gpgpu_event_update_status_cb *cl_gpgpu_event_update_status; /* new a event for a batch buffer */ typedef void (cl_gpgpu_event_pending_cb)(cl_gpgpu, cl_gpgpu_event); extern cl_gpgpu_event_pending_cb *cl_gpgpu_event_pending; /* new a event for a batch buffer */ typedef void (cl_gpgpu_event_resume_cb)(cl_gpgpu_event); extern cl_gpgpu_event_resume_cb *cl_gpgpu_event_resume; /* new a event for a batch buffer */ typedef void (cl_gpgpu_event_delete_cb)(cl_gpgpu_event); extern cl_gpgpu_event_delete_cb *cl_gpgpu_event_delete; /* Get a event time stamp */ typedef void (cl_gpgpu_event_get_timestamp_cb)(cl_gpgpu_event, int, uint64_t*); extern cl_gpgpu_event_get_timestamp_cb *cl_gpgpu_event_get_timestamp; /* Will spawn all threads */ typedef void (cl_gpgpu_walker_cb)(cl_gpgpu, uint32_t simd_sz, uint32_t thread_n, const size_t global_wk_off[3], const size_t global_wk_sz[3], const size_t local_wk_sz[3]); extern cl_gpgpu_walker_cb *cl_gpgpu_walker; /************************************************************************** * Buffer **************************************************************************/ /* Allocate a buffer */ typedef cl_buffer (cl_buffer_alloc_cb)(cl_buffer_mgr, const char*, size_t, size_t); extern cl_buffer_alloc_cb *cl_buffer_alloc; /* Set a buffer's tiling mode */ typedef cl_buffer (cl_buffer_set_tiling_cb)(cl_buffer, int tiling, size_t stride); extern cl_buffer_set_tiling_cb *cl_buffer_set_tiling; #include "cl_context.h" #include "cl_mem.h" typedef struct _cl_context *cl_context; typedef cl_buffer (cl_buffer_alloc_from_texture_cb)(cl_context, unsigned int, int, unsigned int, struct _cl_mem_image *gl_image); extern cl_buffer_alloc_from_texture_cb *cl_buffer_alloc_from_texture; typedef void (cl_buffer_release_from_texture_cb)(cl_context, unsigned int, int, unsigned int); extern cl_buffer_release_from_texture_cb *cl_buffer_release_from_texture; typedef cl_buffer (cl_buffer_get_buffer_from_libva_cb)(cl_context ctx, unsigned int bo_name, size_t *sz); extern cl_buffer_get_buffer_from_libva_cb *cl_buffer_get_buffer_from_libva; typedef cl_buffer (cl_buffer_get_image_from_libva_cb)(cl_context ctx, unsigned int bo_name, struct _cl_mem_image *image); extern cl_buffer_get_image_from_libva_cb *cl_buffer_get_image_from_libva; /* Unref a buffer and destroy it if no more ref */ typedef int (cl_buffer_unreference_cb)(cl_buffer); extern cl_buffer_unreference_cb *cl_buffer_unreference; /* Add one more ref on a buffer */ typedef void (cl_buffer_reference_cb)(cl_buffer); extern cl_buffer_reference_cb *cl_buffer_reference; /* Map a buffer */ typedef int (cl_buffer_map_cb)(cl_buffer, uint32_t write_enable); extern cl_buffer_map_cb *cl_buffer_map; /* Unmap a buffer */ typedef int (cl_buffer_unmap_cb)(cl_buffer); extern cl_buffer_unmap_cb *cl_buffer_unmap; /* Map a buffer in the GTT domain */ typedef int (cl_buffer_map_gtt_cb)(cl_buffer); extern cl_buffer_map_gtt_cb *cl_buffer_map_gtt; /* Map a buffer in the GTT domain, non waiting the GPU read or write*/ typedef int (cl_buffer_map_gtt_unsync_cb)(cl_buffer); extern cl_buffer_map_gtt_unsync_cb *cl_buffer_map_gtt_unsync; /* Unmap a buffer in the GTT domain */ typedef int (cl_buffer_unmap_gtt_cb)(cl_buffer); extern cl_buffer_unmap_gtt_cb *cl_buffer_unmap_gtt; /* Get the virtual address (when mapped) */ typedef void* (cl_buffer_get_virtual_cb)(cl_buffer); extern cl_buffer_get_virtual_cb *cl_buffer_get_virtual; /* Get the size of the buffer */ typedef size_t (cl_buffer_get_size_cb)(cl_buffer); extern cl_buffer_get_size_cb *cl_buffer_get_size; /* Pin a buffer */ typedef int (cl_buffer_pin_cb)(cl_buffer, uint32_t alignment); extern cl_buffer_pin_cb *cl_buffer_pin; /* Unpin a buffer */ typedef int (cl_buffer_unpin_cb)(cl_buffer); extern cl_buffer_unpin_cb *cl_buffer_unpin; /* Fill data in the buffer */ typedef int (cl_buffer_subdata_cb)(cl_buffer, unsigned long, unsigned long, const void*); extern cl_buffer_subdata_cb *cl_buffer_subdata; /* Wait for all pending rendering for this buffer to complete */ typedef int (cl_buffer_wait_rendering_cb) (cl_buffer); extern cl_buffer_wait_rendering_cb *cl_buffer_wait_rendering; /* Get the device id */ typedef int (cl_driver_get_device_id_cb)(void); extern cl_driver_get_device_id_cb *cl_driver_get_device_id; /************************************************************************** * cl_khr_gl_sharing. **************************************************************************/ typedef int (cl_gl_acquire_texture_cb)(void *driver, void *ctx, int target, int level, int texture, void*user_data); extern cl_gl_acquire_texture_cb *cl_gl_acquire_texture; typedef int (cl_gl_release_texture_cb)(void *driver, void *ctx, int target, int level, int texture); extern cl_gl_release_texture_cb *cl_gl_release_texture; typedef int (cl_gl_acquire_buffer_object_cb)(void *driver, void *ctx, int bufobj, void* user_data); extern cl_gl_acquire_buffer_object_cb *cl_gl_acquire_buffer_object; typedef int (cl_gl_release_buffer_object_cb)(void *driver, void *ctx, int bufobj); extern cl_gl_release_buffer_object_cb *cl_gl_release_buffer_object; typedef int (cl_gl_acquire_render_buffer_cb)(void *driver, void *ctx, int rb, void* user_data); extern cl_gl_acquire_render_buffer_cb *cl_gl_acquire_render_buffer; typedef int (cl_gl_release_render_buffer_cb)(void *driver, void *ctx, int rb); extern cl_gl_release_render_buffer_cb *cl_gl_release_render_buffer; #ifndef DEFAULT_DRIVER_DIR /* this is normally defined in Mesa/configs/default with DRI_DRIVER_SEARCH_PATH */ #define DEFAULT_DRIVER_DIR "/usr/local/lib/dri" #endif #endif /* __CL_DRIVER_H__ */ Release_v0.3/src/cl_driver_defs.c000066400000000000000000000102561223142177000171140ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #include "cl_driver.h" #include "cl_utils.h" #include /* Driver */ LOCAL cl_driver_new_cb *cl_driver_new = NULL; LOCAL cl_driver_delete_cb *cl_driver_delete = NULL; LOCAL cl_driver_get_bufmgr_cb *cl_driver_get_bufmgr = NULL; LOCAL cl_driver_get_ver_cb *cl_driver_get_ver = NULL; LOCAL cl_driver_get_device_id_cb *cl_driver_get_device_id = NULL; /* Buffer */ LOCAL cl_buffer_alloc_cb *cl_buffer_alloc = NULL; LOCAL cl_buffer_set_tiling_cb *cl_buffer_set_tiling = NULL; LOCAL cl_buffer_alloc_from_texture_cb *cl_buffer_alloc_from_texture = NULL; LOCAL cl_buffer_release_from_texture_cb *cl_buffer_release_from_texture = NULL; LOCAL cl_buffer_reference_cb *cl_buffer_reference = NULL; LOCAL cl_buffer_unreference_cb *cl_buffer_unreference = NULL; LOCAL cl_buffer_map_cb *cl_buffer_map = NULL; LOCAL cl_buffer_unmap_cb *cl_buffer_unmap = NULL; LOCAL cl_buffer_map_gtt_cb *cl_buffer_map_gtt = NULL; LOCAL cl_buffer_map_gtt_unsync_cb *cl_buffer_map_gtt_unsync = NULL; LOCAL cl_buffer_unmap_gtt_cb *cl_buffer_unmap_gtt = NULL; LOCAL cl_buffer_get_virtual_cb *cl_buffer_get_virtual = NULL; LOCAL cl_buffer_get_size_cb *cl_buffer_get_size = NULL; LOCAL cl_buffer_pin_cb *cl_buffer_pin = NULL; LOCAL cl_buffer_unpin_cb *cl_buffer_unpin = NULL; LOCAL cl_buffer_subdata_cb *cl_buffer_subdata = NULL; LOCAL cl_buffer_wait_rendering_cb *cl_buffer_wait_rendering = NULL; LOCAL cl_buffer_get_buffer_from_libva_cb *cl_buffer_get_buffer_from_libva = NULL; LOCAL cl_buffer_get_image_from_libva_cb *cl_buffer_get_image_from_libva = NULL; /* cl_khr_gl_sharing */ LOCAL cl_gl_acquire_texture_cb *cl_gl_acquire_texture = NULL; LOCAL cl_gl_release_texture_cb *cl_gl_release_texture = NULL; LOCAL cl_gl_acquire_buffer_object_cb *cl_gl_acquire_buffer_object = NULL; LOCAL cl_gl_release_buffer_object_cb *cl_gl_release_buffer_object = NULL; LOCAL cl_gl_acquire_render_buffer_cb *cl_gl_acquire_render_buffer = NULL; LOCAL cl_gl_release_render_buffer_cb *cl_gl_release_render_buffer = NULL; /* GPGPU */ LOCAL cl_gpgpu_new_cb *cl_gpgpu_new = NULL; LOCAL cl_gpgpu_delete_cb *cl_gpgpu_delete = NULL; LOCAL cl_gpgpu_sync_cb *cl_gpgpu_sync = NULL; LOCAL cl_gpgpu_bind_buf_cb *cl_gpgpu_bind_buf = NULL; LOCAL cl_gpgpu_set_stack_cb *cl_gpgpu_set_stack = NULL; LOCAL cl_gpgpu_set_scratch_cb *cl_gpgpu_set_scratch = NULL; LOCAL cl_gpgpu_bind_image_cb *cl_gpgpu_bind_image = NULL; LOCAL cl_gpgpu_state_init_cb *cl_gpgpu_state_init = NULL; LOCAL cl_gpgpu_alloc_constant_buffer_cb * cl_gpgpu_alloc_constant_buffer = NULL; LOCAL cl_gpgpu_set_perf_counters_cb *cl_gpgpu_set_perf_counters = NULL; LOCAL cl_gpgpu_upload_curbes_cb *cl_gpgpu_upload_curbes = NULL; LOCAL cl_gpgpu_states_setup_cb *cl_gpgpu_states_setup = NULL; LOCAL cl_gpgpu_upload_samplers_cb *cl_gpgpu_upload_samplers = NULL; LOCAL cl_gpgpu_batch_reset_cb *cl_gpgpu_batch_reset = NULL; LOCAL cl_gpgpu_batch_start_cb *cl_gpgpu_batch_start = NULL; LOCAL cl_gpgpu_batch_end_cb *cl_gpgpu_batch_end = NULL; LOCAL cl_gpgpu_flush_cb *cl_gpgpu_flush = NULL; LOCAL cl_gpgpu_walker_cb *cl_gpgpu_walker = NULL; LOCAL cl_gpgpu_bind_sampler_cb *cl_gpgpu_bind_sampler = NULL; LOCAL cl_gpgpu_event_new_cb *cl_gpgpu_event_new = NULL; LOCAL cl_gpgpu_event_update_status_cb *cl_gpgpu_event_update_status = NULL; LOCAL cl_gpgpu_event_pending_cb *cl_gpgpu_event_pending = NULL; LOCAL cl_gpgpu_event_resume_cb *cl_gpgpu_event_resume = NULL; LOCAL cl_gpgpu_event_delete_cb *cl_gpgpu_event_delete = NULL; LOCAL cl_gpgpu_event_get_timestamp_cb *cl_gpgpu_event_get_timestamp = NULL; Release_v0.3/src/cl_driver_type.h000066400000000000000000000016501223142177000171570ustar00rootroot00000000000000/************************************************************************** * cl_driver: * Hide behind some call backs the buffer allocation / deallocation ... This * will allow us to make the use of a software performance simulator easier and * to minimize the code specific for the HW and for the simulator **************************************************************************/ /* Encapsulates command buffer / data buffer / kernels */ typedef struct _cl_buffer *cl_buffer; /* Encapsulates buffer manager */ typedef struct _cl_buffer_mgr *cl_buffer_mgr; /* Encapsulates the driver backend functionalities */ typedef struct _cl_driver *cl_driver; /* Encapsulates the gpgpu stream of commands */ typedef struct _cl_gpgpu *cl_gpgpu; /* Encapsulates the event of a command stream */ typedef struct _cl_gpgpu_event *cl_gpgpu_event; typedef struct _cl_context_prop *cl_context_prop; typedef struct _cl_sampler *cl_sampler; Release_v0.3/src/cl_enqueue.c000066400000000000000000000250531223142177000162700ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Rong Yang */ #include "cl_enqueue.h" #include "cl_image.h" #include "cl_driver.h" #include "cl_utils.h" #include #include #include #include cl_int cl_enqueue_read_buffer(enqueue_data* data) { cl_int err = CL_SUCCESS; void* src_ptr; if (!(src_ptr = cl_mem_map_auto(data->mem_obj))) { err = CL_MAP_FAILURE; goto error; } memcpy(data->ptr, (char*)src_ptr + data->offset, data->size); err = cl_mem_unmap_auto(data->mem_obj); error: return err; } cl_int cl_enqueue_read_buffer_rect(enqueue_data* data) { cl_int err = CL_SUCCESS; void* src_ptr; void* dst_ptr; const size_t* origin = data->origin; const size_t* host_origin = data->host_origin; const size_t* region = data->region; if (!(src_ptr = cl_mem_map_auto(data->mem_obj))) { err = CL_MAP_FAILURE; goto error; } size_t offset = origin[0] + data->row_pitch*origin[1] + data->slice_pitch*origin[2]; src_ptr = (char*)src_ptr + offset; offset = host_origin[0] + data->host_row_pitch*host_origin[1] + data->host_slice_pitch*host_origin[2]; dst_ptr = (char *)data->ptr + offset; if (data->row_pitch == region[0] && data->row_pitch == data->host_row_pitch && (region[2] == 1 || (data->slice_pitch == region[0]*region[1] && data->slice_pitch == data->host_slice_pitch))) { memcpy(dst_ptr, src_ptr, region[2] == 1 ? data->row_pitch*region[1] : data->slice_pitch*region[2]); } else { cl_uint y, z; for (z = 0; z < region[2]; z++) { const char* src = src_ptr; char* dst = dst_ptr; for (y = 0; y < region[1]; y++) { memcpy(dst, src, region[0]); src += data->row_pitch; dst += data->host_row_pitch; } src_ptr = (char*)src_ptr + data->slice_pitch; dst_ptr = (char*)dst_ptr + data->host_slice_pitch; } } err = cl_mem_unmap_auto(data->mem_obj); error: return err; } cl_int cl_enqueue_write_buffer(enqueue_data *data) { cl_int err = CL_SUCCESS; void* dst_ptr; if (!(dst_ptr = cl_mem_map_auto(data->mem_obj))) { err = CL_MAP_FAILURE; goto error; } memcpy((char*)dst_ptr + data->offset, data->const_ptr, data->size); err = cl_mem_unmap_auto(data->mem_obj); error: return err; } cl_int cl_enqueue_write_buffer_rect(enqueue_data *data) { cl_int err = CL_SUCCESS; void* src_ptr; void* dst_ptr; const size_t* origin = data->origin; const size_t* host_origin = data->host_origin; const size_t* region = data->region; if (!(dst_ptr = cl_mem_map_auto(data->mem_obj))) { err = CL_MAP_FAILURE; goto error; } size_t offset = origin[0] + data->row_pitch*origin[1] + data->slice_pitch*origin[2]; dst_ptr = (char *)dst_ptr + offset; offset = host_origin[0] + data->host_row_pitch*host_origin[1] + data->host_slice_pitch*host_origin[2]; src_ptr = (char*)data->const_ptr + offset; if (data->row_pitch == region[0] && data->row_pitch == data->host_row_pitch && (region[2] == 1 || (data->slice_pitch == region[0]*region[1] && data->slice_pitch == data->host_slice_pitch))) { memcpy(dst_ptr, src_ptr, region[2] == 1 ? data->row_pitch*region[1] : data->slice_pitch*region[2]); } else { cl_uint y, z; for (z = 0; z < region[2]; z++) { const char* src = src_ptr; char* dst = dst_ptr; for (y = 0; y < region[1]; y++) { memcpy(dst, src, region[0]); src += data->host_row_pitch; dst += data->row_pitch; } src_ptr = (char*)src_ptr + data->host_slice_pitch; dst_ptr = (char*)dst_ptr + data->slice_pitch; } } err = cl_mem_unmap_auto(data->mem_obj); error: return err; } cl_int cl_enqueue_read_image(enqueue_data *data) { cl_int err = CL_SUCCESS; void* src_ptr; cl_mem mem = data->mem_obj; CHECK_IMAGE(mem, image); const size_t* origin = data->origin; const size_t* region = data->region; if (!(src_ptr = cl_mem_map_auto(mem))) { err = CL_MAP_FAILURE; goto error; } size_t offset = image->bpp*origin[0] + image->row_pitch*origin[1] + image->slice_pitch*origin[2]; src_ptr = (char*)src_ptr + offset; if (!origin[0] && region[0] == image->w && data->row_pitch == image->row_pitch && (region[2] == 1 || (!origin[1] && region[1] == image->h && data->slice_pitch == image->slice_pitch))) { memcpy(data->ptr, src_ptr, region[2] == 1 ? data->row_pitch*region[1] : data->slice_pitch*region[2]); } else { cl_uint y, z; for (z = 0; z < region[2]; z++) { const char* src = src_ptr; char* dst = data->ptr; for (y = 0; y < region[1]; y++) { memcpy(dst, src, image->bpp*region[0]); src += image->row_pitch; dst += data->row_pitch; } src_ptr = (char*)src_ptr + image->slice_pitch; data->ptr = (char*)data->ptr + data->slice_pitch; } } err = cl_mem_unmap_auto(mem); error: return err; } cl_int cl_enqueue_write_image(enqueue_data *data) { cl_int err = CL_SUCCESS; void* dst_ptr; cl_mem mem = data->mem_obj; CHECK_IMAGE(mem, image); if (!(dst_ptr = cl_mem_map_auto(mem))) { err = CL_MAP_FAILURE; goto error; } cl_mem_copy_image_region(data->origin, data->region, dst_ptr, image->row_pitch, image->slice_pitch, data->const_ptr, data->row_pitch, data->slice_pitch, image); err = cl_mem_unmap_auto(mem); error: return err; } cl_int cl_enqueue_map_buffer(enqueue_data *data) { void *ptr = NULL; cl_int err = CL_SUCCESS; cl_mem buffer = data->mem_obj; //because using unsync map in clEnqueueMapBuffer, so force use map_gtt here if (!(ptr = cl_mem_map_gtt(buffer))) { err = CL_MAP_FAILURE; goto error; } ptr = (char*)ptr + data->offset; assert(data->ptr == ptr); if(buffer->flags & CL_MEM_USE_HOST_PTR) { assert(buffer->host_ptr); memcpy(buffer->host_ptr + data->offset, ptr, data->size); } error: return err; } cl_int cl_enqueue_map_image(enqueue_data *data) { cl_int err = CL_SUCCESS; cl_mem mem = data->mem_obj; void *ptr = NULL; CHECK_IMAGE(mem, image); if (!(ptr = cl_mem_map_gtt(mem))) { err = CL_MAP_FAILURE; goto error; } assert(data->ptr == (char*)ptr + data->offset); if(mem->flags & CL_MEM_USE_HOST_PTR) { assert(mem->host_ptr); cl_mem_copy_image_region(data->origin, data->region, mem->host_ptr, image->host_row_pitch, image->host_slice_pitch, data->ptr, data->row_pitch, data->slice_pitch, image); } error: return err; } cl_int cl_enqueue_unmap_mem_object(enqueue_data *data) { cl_int err = CL_SUCCESS; int i; size_t mapped_size = 0; void * v_ptr = NULL; void * mapped_ptr = data->ptr; cl_mem memobj = data->mem_obj; assert(memobj->mapped_ptr_sz >= memobj->map_ref); INVALID_VALUE_IF(!mapped_ptr); for (i = 0; i < memobj->mapped_ptr_sz; i++) { if (memobj->mapped_ptr[i].ptr == mapped_ptr) { memobj->mapped_ptr[i].ptr = NULL; mapped_size = memobj->mapped_ptr[i].size; v_ptr = memobj->mapped_ptr[i].v_ptr; memobj->mapped_ptr[i].size = 0; memobj->mapped_ptr[i].v_ptr = NULL; memobj->map_ref--; break; } } /* can not find a mapped address? */ INVALID_VALUE_IF(i == memobj->mapped_ptr_sz); if (memobj->flags & CL_MEM_USE_HOST_PTR) { assert(mapped_ptr >= memobj->host_ptr && mapped_ptr + mapped_size <= memobj->host_ptr + memobj->size); /* Sync the data. */ memcpy(v_ptr, mapped_ptr, mapped_size); } else { assert(v_ptr == mapped_ptr); } cl_mem_unmap_gtt(memobj); /* shrink the mapped slot. */ if (memobj->mapped_ptr_sz/2 > memobj->map_ref) { int j = 0; cl_mapped_ptr *new_ptr = (cl_mapped_ptr *)malloc( sizeof(cl_mapped_ptr) * (memobj->mapped_ptr_sz/2)); if (!new_ptr) { /* Just do nothing. */ goto error; } memset(new_ptr, 0, (memobj->mapped_ptr_sz/2) * sizeof(cl_mapped_ptr)); for (i = 0; i < memobj->mapped_ptr_sz; i++) { if (memobj->mapped_ptr[i].ptr) { new_ptr[j] = memobj->mapped_ptr[i]; j++; assert(j < memobj->mapped_ptr_sz/2); } } memobj->mapped_ptr_sz = memobj->mapped_ptr_sz/2; free(memobj->mapped_ptr); memobj->mapped_ptr = new_ptr; } error: return err; } cl_int cl_enqueue_native_kernel(enqueue_data *data) { cl_int err = CL_SUCCESS; cl_uint num_mem_objects = (cl_uint)data->offset; const cl_mem *mem_list = data->mem_list; const void **args_mem_loc = (const void **)data->const_ptr; cl_uint i; for (i=0; iuser_func(data->ptr); for (i=0; iptr); error: return err; } cl_int cl_enqueue_handle(enqueue_data* data) { switch(data->type) { case EnqueueReadBuffer: return cl_enqueue_read_buffer(data); case EnqueueReadBufferRect: return cl_enqueue_read_buffer_rect(data); case EnqueueWriteBuffer: return cl_enqueue_write_buffer(data); case EnqueueWriteBufferRect: return cl_enqueue_write_buffer_rect(data); case EnqueueReadImage: return cl_enqueue_read_image(data); case EnqueueWriteImage: return cl_enqueue_write_image(data); case EnqueueMapBuffer: return cl_enqueue_map_buffer(data); case EnqueueMapImage: return cl_enqueue_map_image(data); case EnqueueUnmapMemObject: return cl_enqueue_unmap_mem_object(data); case EnqueueCopyBufferRect: case EnqueueCopyImage: case EnqueueCopyBufferToImage: case EnqueueCopyImageToBuffer: case EnqueueNDRangeKernel: cl_gpgpu_event_resume((cl_gpgpu_event)data->ptr); return CL_SUCCESS; case EnqueueNativeKernel: return cl_enqueue_native_kernel(data); default: return CL_SUCCESS; } } Release_v0.3/src/cl_enqueue.h000066400000000000000000000047301223142177000162740ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Rong Yang */ #ifndef __CL_ENQUEUE_H__ #define __CL_ENQUEUE_H__ #include "cl_internals.h" #include "cl_driver.h" #include "CL/cl.h" typedef enum { EnqueueReadBuffer = 0, EnqueueReadBufferRect, EnqueueWriteBuffer, EnqueueWriteBufferRect, EnqueueCopyBuffer, EnqueueCopyBufferRect, EnqueueReadImage, EnqueueWriteImage, EnqueueCopyImage, EnqueueCopyImageToBuffer, EnqueueCopyBufferToImage, EnqueueMapBuffer, EnqueueMapImage, EnqueueUnmapMemObject, EnqueueNDRangeKernel, EnqueueNativeKernel, EnqueueMarker, EnqueueInvalid } enqueue_type; typedef struct _enqueue_data { enqueue_type type; /* Command type */ cl_mem mem_obj; /* Enqueue's cl_mem */ cl_command_queue queue; /* Command queue */ size_t offset; /* Mem object's offset */ size_t size; /* Size */ size_t origin[3]; /* Origin */ size_t host_origin[3]; /* Origin */ size_t region[3]; /* Region */ size_t row_pitch; /* Row pitch */ size_t slice_pitch; /* Slice pitch */ size_t host_row_pitch; /* Host row pitch, used in read/write buffer rect */ size_t host_slice_pitch; /* Host slice pitch, used in read/write buffer rect */ const void * const_ptr; /* Const ptr for memory read */ void * ptr; /* Ptr for write and return value */ const cl_mem* mem_list; /* mem_list of clEnqueueNativeKernel */ void (*user_func)(void *); /* pointer to a host-callable user function */ } enqueue_data; /* Do real enqueue commands */ cl_int cl_enqueue_handle(enqueue_data* data); #endif /* __CL_ENQUEUE_H__ */ Release_v0.3/src/cl_event.c000066400000000000000000000360041223142177000157400ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Rong Yang */ #include "cl_event.h" #include "cl_context.h" #include "cl_utils.h" #include "cl_alloc.h" #include "cl_khr_icd.h" #include "cl_kernel.h" #include "cl_command_queue.h" #include #include inline cl_bool cl_event_is_gpu_command_type(cl_command_type type) { switch(type) { case CL_COMMAND_COPY_BUFFER: case CL_COMMAND_COPY_IMAGE: case CL_COMMAND_COPY_IMAGE_TO_BUFFER: case CL_COMMAND_COPY_BUFFER_TO_IMAGE: case CL_COMMAND_COPY_BUFFER_RECT: case CL_COMMAND_TASK: case CL_COMMAND_NDRANGE_KERNEL: return CL_TRUE; default: return CL_FALSE; } } cl_event cl_event_new(cl_context ctx, cl_command_queue queue, cl_command_type type, cl_bool emplict) { cl_event event = NULL; /* Allocate and inialize the structure itself */ TRY_ALLOC_NO_ERR (event, CALLOC(struct _cl_event)); SET_ICD(event->dispatch) event->magic = CL_MAGIC_EVENT_HEADER; event->ref_n = 1; /* Append the event in the context event list */ pthread_mutex_lock(&ctx->event_lock); event->next = ctx->events; if (ctx->events != NULL) ctx->events->prev = event; ctx->events = event; pthread_mutex_unlock(&ctx->event_lock); event->ctx = ctx; cl_context_add_ref(ctx); /* Initialize all members and create GPGPU event object */ event->queue = queue; event->type = type; event->gpgpu_event = NULL; if(type == CL_COMMAND_USER) { event->status = CL_SUBMITTED; } else { event->status = CL_QUEUED; if(cl_event_is_gpu_command_type(event->type)) event->gpgpu_event = cl_gpgpu_event_new(queue->gpgpu); } cl_event_add_ref(event); //dec when complete event->user_cb = NULL; event->enqueue_cb = NULL; event->waits_head = NULL; event->emplict = emplict; if(queue && event->gpgpu_event) queue->last_event = event; exit: return event; error: cl_event_delete(event); event = NULL; goto exit; } void cl_event_delete(cl_event event) { if (UNLIKELY(event == NULL)) return; cl_event_update_status(event); if (atomic_dec(&event->ref_n) > 1) return; if(event->queue && event->queue->last_event == event) event->queue->last_event = NULL; /* Call all user's callback if haven't execute */ user_callback *cb = event->user_cb; while(event->user_cb) { cb = event->user_cb; if(cb->executed == CL_FALSE) { cb->pfn_notify(event, event->status, cb->user_data); } event->user_cb = cb->next; cl_free(cb); } /* delete gpgpu event object */ if(event->gpgpu_event) cl_gpgpu_event_delete(event->gpgpu_event); /* Remove it from the list */ assert(event->ctx); pthread_mutex_lock(&event->ctx->event_lock); if (event->prev) event->prev->next = event->next; if (event->next) event->next->prev = event->prev; if (event->prev == NULL && event->next == NULL) event->ctx->events = NULL; pthread_mutex_unlock(&event->ctx->event_lock); cl_context_delete(event->ctx); cl_free(event); } void cl_event_add_ref(cl_event event) { assert(event); atomic_inc(&event->ref_n); } cl_int cl_event_set_callback(cl_event event , cl_int command_exec_callback_type, EVENT_NOTIFY pfn_notify, void* user_data) { assert(event); assert(pfn_notify); cl_int err = CL_SUCCESS; user_callback *cb; TRY_ALLOC(cb, CALLOC(user_callback)); cb->pfn_notify = pfn_notify; cb->user_data = user_data; cb->status = command_exec_callback_type; cb->executed = CL_FALSE; cb->next = event->user_cb; event->user_cb = cb; exit: return err; error: err = CL_OUT_OF_HOST_MEMORY; cl_free(cb); goto exit; }; cl_int cl_event_check_waitlist(cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event,cl_context ctx) { cl_int err = CL_SUCCESS; cl_int i; /* check the event_wait_list and num_events_in_wait_list */ if((event_wait_list == NULL) && (num_events_in_wait_list > 0)) goto error; if ((event_wait_list != NULL) && (num_events_in_wait_list == 0)){ goto error; } /* check the event and context */ for(i=0; istatus < CL_COMPLETE) { err = CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST; goto exit; } if(event && *event == event_wait_list[i]) goto error; if(event_wait_list[i]->ctx != ctx) goto error; } exit: return err; error: err = CL_INVALID_EVENT_WAIT_LIST; //reset error goto exit; } cl_int cl_event_wait_events(cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_command_queue queue) { cl_int i, j; /* Check whether wait user events */ for(i=0; istatus <= CL_COMPLETE) continue; /* Need wait on user event, return and do enqueue defer */ if((event_wait_list[i]->type == CL_COMMAND_USER) || (event_wait_list[i]->enqueue_cb && (event_wait_list[i]->enqueue_cb->wait_user_events != NULL))){ for(j=0; jbarrier_index > 0) { return CL_ENQUEUE_EXECUTE_DEFER; } /* Non user events or all user event finished, wait all enqueue events finish */ for(i=0; istatus <= CL_COMPLETE) continue; //enqueue callback haven't finish, in another thread, wait if(event_wait_list[i]->enqueue_cb != NULL) return CL_ENQUEUE_EXECUTE_DEFER; if(event_wait_list[i]->gpgpu_event) cl_gpgpu_event_update_status(event_wait_list[i]->gpgpu_event, 1); cl_event_set_status(event_wait_list[i], CL_COMPLETE); //Execute user's callback } return CL_ENQUEUE_EXECUTE_IMM; } void cl_event_new_enqueue_callback(cl_event event, enqueue_data *data, cl_uint num_events_in_wait_list, const cl_event *event_wait_list) { enqueue_callback *cb, *node; user_event *user_events, *u_ev; cl_command_queue queue = event->queue; cl_int i; /* Allocate and inialize the structure itself */ TRY_ALLOC_NO_ERR (cb, CALLOC(enqueue_callback)); cb->num_events = num_events_in_wait_list; TRY_ALLOC_NO_ERR (cb->wait_list, CALLOC_ARRAY(cl_event, num_events_in_wait_list)); for(i=0; iwait_list[i] = event_wait_list[i]; cb->event = event; cb->next = NULL; cb->wait_user_events = NULL; if(queue && queue->barrier_index > 0) { for(i=0; ibarrier_index; i++) { /* Insert the enqueue_callback to user event list */ node = queue->wait_events[i]->waits_head; if(node == NULL) queue->wait_events[i]->waits_head = cb; else while((node != cb) && node->next) node = node->next; if(node == cb) //wait on dup user event continue; node->next = cb; /* Insert the user event to enqueue_callback's wait_user_events */ TRY_ALLOC_NO_ERR (u_ev, CALLOC(user_event)); u_ev->event = queue->wait_events[i]; u_ev->next = cb->wait_user_events; cb->wait_user_events = u_ev; } } /* Find out all user events that events in event_wait_list wait */ for(i=0; istatus <= CL_COMPLETE) continue; if(event_wait_list[i]->type == CL_COMMAND_USER) { /* Insert the enqueue_callback to user event list */ node = event_wait_list[i]->waits_head; if(node == NULL) event_wait_list[i]->waits_head = cb; else { while((node != cb) && node->next) node = node->next; if(node == cb) //wait on dup user event continue; node->next = cb; } /* Insert the user event to enqueue_callback's wait_user_events */ TRY_ALLOC_NO_ERR (u_ev, CALLOC(user_event)); u_ev->event = event_wait_list[i]; u_ev->next = cb->wait_user_events; cb->wait_user_events = u_ev; cl_command_queue_insert_event(event->queue, event_wait_list[i]); } else if(event_wait_list[i]->enqueue_cb != NULL) { user_events = event_wait_list[i]->enqueue_cb->wait_user_events; while(user_events != NULL) { /* Insert the enqueue_callback to user event's waits_tail */ node = user_events->event->waits_head; while((node != cb) && node->next) node = node->next; if(node == cb) { //wait on dup user event user_events = user_events->next; continue; } node->next = cb; /* Insert the user event to enqueue_callback's wait_user_events */ TRY_ALLOC_NO_ERR (u_ev, CALLOC(user_event)); u_ev->event = user_events->event; u_ev->next = cb->wait_user_events; cb->wait_user_events = u_ev; user_events = user_events->next; cl_command_queue_insert_event(event->queue, event_wait_list[i]); } } } if(data->queue != NULL && event->gpgpu_event != NULL) { cl_gpgpu_event_pending(data->queue->gpgpu, event->gpgpu_event); data->ptr = (void *)event->gpgpu_event; } cb->data = *data; event->enqueue_cb = cb; exit: return; error: if(cb) { while(cb->wait_user_events) { u_ev = cb->wait_user_events; cb->wait_user_events = cb->wait_user_events->next; cl_free(u_ev); } if(cb->wait_list) cl_free(cb->wait_list); cl_free(cb); } goto exit; } void cl_event_set_status(cl_event event, cl_int status) { user_callback *user_cb; user_event *u_ev, *u_ev_next; cl_int ret, i; cl_event evt; pthread_mutex_lock(&event->ctx->event_lock); if(status >= event->status) { pthread_mutex_unlock(&event->ctx->event_lock); return; } if(event->status <= CL_COMPLETE) { event->status = status; //have done enqueue before or doing in another thread pthread_mutex_unlock(&event->ctx->event_lock); return; } if(status <= CL_COMPLETE) { if(event->enqueue_cb) { cl_enqueue_handle(&event->enqueue_cb->data); if(event->gpgpu_event) cl_gpgpu_event_update_status(event->gpgpu_event, 1); //now set complet, need refine event->status = status; //Change the event status after enqueue and befor unlock pthread_mutex_unlock(&event->ctx->event_lock); for(i=0; ienqueue_cb->num_events; i++) cl_event_delete(event->enqueue_cb->wait_list[i]); pthread_mutex_lock(&event->ctx->event_lock); if(event->enqueue_cb->wait_list) cl_free(event->enqueue_cb->wait_list); cl_free(event->enqueue_cb); event->enqueue_cb = NULL; } } if(event->status >= status) //maybe changed in other threads event->status = status; pthread_mutex_unlock(&event->ctx->event_lock); if(event->status <= CL_COMPLETE) cl_event_delete(event); /* Call user callback */ user_cb = event->user_cb; while(user_cb) { if(user_cb->status >= status) { user_cb->pfn_notify(event, event->status, user_cb->user_data); user_cb->executed = CL_TRUE; } user_cb = user_cb->next; } if(event->type != CL_COMMAND_USER) return; /* Check all defer enqueue */ enqueue_callback *cb, *enqueue_cb = event->waits_head; while(enqueue_cb) { /* Remove this user event in enqueue_cb */ while(enqueue_cb->wait_user_events && enqueue_cb->wait_user_events->event == event) { u_ev = enqueue_cb->wait_user_events; enqueue_cb->wait_user_events = enqueue_cb->wait_user_events->next; cl_free(u_ev); } u_ev = enqueue_cb->wait_user_events; while(u_ev) { u_ev_next = u_ev->next; if(u_ev_next && u_ev_next->event == event) { u_ev->next = u_ev_next->next; cl_free(u_ev_next); } else u_ev->next = u_ev_next; } /* Still wait on other user events */ if(enqueue_cb->wait_user_events != NULL) { enqueue_cb = enqueue_cb->next; continue; } //remove user event frome enqueue_cb's ctx cl_command_queue_remove_event(enqueue_cb->event->queue, event); /* All user events complete, now wait enqueue events */ ret = cl_event_wait_events(enqueue_cb->num_events, enqueue_cb->wait_list, enqueue_cb->event->queue); assert(ret != CL_ENQUEUE_EXECUTE_DEFER); cb = enqueue_cb; enqueue_cb = enqueue_cb->next; /* Call the pending operation */ evt = cb->event; cl_event_set_status(cb->event, CL_COMPLETE); if(evt->emplict == CL_FALSE) { cl_event_delete(evt); } } event->waits_head = NULL; } void cl_event_update_status(cl_event event) { if(event->status <= CL_COMPLETE) return; if((event->gpgpu_event) && (cl_gpgpu_event_update_status(event->gpgpu_event, 0) == command_complete)) cl_event_set_status(event, CL_COMPLETE); } cl_int cl_event_marker(cl_command_queue queue, cl_event* event) { enqueue_data data; *event = cl_event_new(queue->ctx, queue, CL_COMMAND_MARKER, CL_TRUE); if(event == NULL) return CL_OUT_OF_HOST_MEMORY; //if wait_events_num>0, the marker event need wait queue->wait_events if(queue->wait_events_num > 0) { data.type = EnqueueMarker; cl_event_new_enqueue_callback(*event, &data, queue->wait_events_num, queue->wait_events); return CL_SUCCESS; } if(queue->last_event && queue->last_event->gpgpu_event) { cl_gpgpu_event_update_status(queue->last_event->gpgpu_event, 1); } cl_event_set_status(*event, CL_COMPLETE); return CL_SUCCESS; } cl_int cl_event_profiling(cl_event event, cl_profiling_info param_name, cl_ulong *ret_val) { if (!event->gpgpu_event) { /* Some event like read buffer do not need GPU involved, so we just return all the profiling to 0 now. */ *ret_val = 0; return CL_SUCCESS; } if(param_name == CL_PROFILING_COMMAND_START || param_name == CL_PROFILING_COMMAND_QUEUED || param_name == CL_PROFILING_COMMAND_SUBMIT) { cl_gpgpu_event_get_timestamp(event->gpgpu_event, 0, ret_val); return CL_SUCCESS; } else if (param_name == CL_PROFILING_COMMAND_END) { cl_gpgpu_event_get_timestamp(event->gpgpu_event, 1, ret_val); return CL_SUCCESS; } else { return CL_INVALID_VALUE; } } Release_v0.3/src/cl_event.h000066400000000000000000000104401223142177000157410ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #ifndef __CL_EVENT_H__ #define __CL_EVENT_H__ #include #include "cl_internals.h" #include "cl_driver.h" #include "cl_enqueue.h" #include "CL/cl.h" #define CL_ENQUEUE_EXECUTE_IMM 0 #define CL_ENQUEUE_EXECUTE_DEFER 1 typedef struct _user_event { cl_event event; /* The user event */ struct _user_event* next; /* Next user event in list */ } user_event; typedef struct _enqueue_callback { cl_event event; /* The event relative this enqueue callback */ enqueue_data data; /* Hold all enqueue callback's infomation */ cl_uint num_events; /* num events in wait list */ cl_event* wait_list; /* All event wait list this callback wait on */ user_event* wait_user_events; /* The head of user event list the callback wait on */ struct _enqueue_callback* next; /* The next enqueue callback in wait list */ } enqueue_callback; typedef void (CL_CALLBACK *EVENT_NOTIFY)(cl_event event, cl_int event_command_exec_status, void *user_data); typedef struct _user_callback { cl_int status; /* The execution status */ cl_bool executed; /* Indicat the callback function been called or not */ EVENT_NOTIFY pfn_notify; /* Callback function */ void* user_data; /* Callback user data */ struct _user_callback* next; /* Next event callback in list */ } user_callback; struct _cl_event { DEFINE_ICD(dispatch) uint64_t magic; /* To identify it as a sampler object */ volatile int ref_n; /* We reference count this object */ cl_context ctx; /* The context associated with event */ cl_event prev, next; /* We chain the memory buffers together */ cl_command_queue queue; /* The command queue associated with event */ cl_command_type type; /* The command type associated with event */ cl_int status; /* The execution status */ cl_gpgpu_event gpgpu_event; /* The event object communicate with hardware */ user_callback* user_cb; /* The event callback functions */ enqueue_callback* enqueue_cb; /* This event's enqueue */ enqueue_callback* waits_head; /* The head of enqueues list wait on this event */ cl_bool emplict; /* Identify this event whether created by api emplict*/ }; /* Create a new event object */ cl_event cl_event_new(cl_context, cl_command_queue, cl_command_type, cl_bool); /* Unref the object and delete it if no more reference on it */ void cl_event_delete(cl_event); /* Add one more reference to this object */ void cl_event_add_ref(cl_event); /* Rigister a user callback function for specific commond execution status */ cl_int cl_event_set_callback(cl_event, cl_int, EVENT_NOTIFY, void *); /* Check events wait list for enqueue commonds */ cl_int cl_event_check_waitlist(cl_uint, const cl_event *, cl_event *, cl_context); /* Wait the all events in wait list complete */ cl_int cl_event_wait_events(cl_uint, const cl_event *, cl_command_queue); /* New a enqueue suspend task */ void cl_event_new_enqueue_callback(cl_event, enqueue_data *, cl_uint, const cl_event *); /* Set the event status and call all callbacks */ void cl_event_set_status(cl_event, cl_int); /* Check and update event status */ void cl_event_update_status(cl_event); /* Create the marker event */ cl_int cl_event_marker(cl_command_queue, cl_event*); /* Do the event profiling */ cl_int cl_event_profiling(cl_event event, cl_profiling_info param_name, cl_ulong *ret_val); #endif /* __CL_EVENT_H__ */ Release_v0.3/src/cl_extensions.c000066400000000000000000000052301223142177000170130ustar00rootroot00000000000000#ifdef HAS_EGL #include "EGL/egl.h" #include "EGL/eglext.h" #endif #include "cl_platform_id.h" #include "cl_internals.h" #include "CL/cl.h" #include "cl_utils.h" #include #include static struct cl_extensions intel_extensions = { { #define DECL_EXT(name) \ {(struct cl_extension_base){.ext_id = cl_##name##_ext_id, .ext_name = "cl_" #name, .ext_enabled = 0}}, DECL_ALL_EXTENSIONS }, #undef DECL_EXT {""} }; void check_basic_extension(cl_extensions_t *extensions) { int id; for(id = BASE_EXT_START_ID; id <= BASE_EXT_END_ID; id++) if (id != EXT_ID(khr_fp64)) extensions->extensions[id].base.ext_enabled = 1; } void check_opt1_extension(cl_extensions_t *extensions) { int id; for(id = OPT1_EXT_START_ID; id <= OPT1_EXT_END_ID; id++) if (id == EXT_ID(khr_icd)) extensions->extensions[id].base.ext_enabled = 1; } void check_gl_extension(cl_extensions_t *extensions) { #if defined(HAS_EGL) int id; /* For now, we only support cl_khr_gl_sharing. */ for(id = GL_EXT_START_ID; id <= GL_EXT_END_ID; id++) if (id == EXT_ID(khr_gl_sharing)) extensions->extensions[id].base.ext_enabled = 1; #endif } void check_intel_extension(cl_extensions_t *extensions) { /* Should put those map/unmap extensions here. */ } void process_extension_str(cl_extensions_t *extensions) { int str_max = sizeof(extensions->ext_str); int str_offset = 0; int id; extensions->ext_str[str_max] = '\0'; for(id = 0; id < cl_khr_extension_id_max; id++) { if (extensions->extensions[id].base.ext_enabled) { int copy_len; char *ext_name = extensions->extensions[id].base.ext_name; if (str_offset + 1 >= str_max) return; if (str_offset != 0) extensions->ext_str[str_offset - 1] = ' '; copy_len = (strlen(ext_name) + 1 + str_offset) < str_max ? (strlen(ext_name) + 1) : (str_max - str_offset - 1); strncpy(&extensions->ext_str[str_offset], extensions->extensions[id].base.ext_name, copy_len); str_offset += copy_len; } } } LOCAL void cl_intel_platform_extension_init(cl_platform_id intel_platform) { static int initialized = 0; if (initialized) { intel_platform->internal_extensions = &intel_extensions; intel_platform->extensions = intel_extensions.ext_str; return; } check_basic_extension(&intel_extensions); check_opt1_extension(&intel_extensions); check_gl_extension(&intel_extensions); check_intel_extension(&intel_extensions); process_extension_str(&intel_extensions); intel_platform->internal_extensions = &intel_extensions; intel_platform->extensions = intel_extensions.ext_str; initialized = 1; return; } Release_v0.3/src/cl_extensions.h000066400000000000000000000054461223142177000170310ustar00rootroot00000000000000/* The following approved Khronos extension * names must be returned by all device that * support OpenCL C 1.2. */ #define DECL_BASE_EXTENSIONS \ DECL_EXT(khr_global_int32_base_atomics) \ DECL_EXT(khr_global_int32_extended_atomics) \ DECL_EXT(khr_local_int32_base_atomics) \ DECL_EXT(khr_local_int32_extended_atomics) \ DECL_EXT(khr_byte_addressable_store) \ DECL_EXT(khr_fp64) /* The OPT1 extensions are those optional extensions * which don't have external dependecies*/ #define DECL_OPT1_EXTENSIONS \ DECL_EXT(khr_int64_base_atomics)\ DECL_EXT(khr_int64_extended_atomics)\ DECL_EXT(khr_3d_image_writes)\ DECL_EXT(khr_fp16)\ DECL_EXT(khr_image2d_from_buffer)\ DECL_EXT(khr_initialize_memory)\ DECL_EXT(khr_context_abort)\ DECL_EXT(khr_depth_images)\ DECL_EXT(khr_spir) \ DECL_EXT(khr_icd) #define DECL_GL_EXTENSIONS \ DECL_EXT(khr_gl_sharing)\ DECL_EXT(khr_gl_event)\ DECL_EXT(khr_gl_depth_images)\ DECL_EXT(khr_gl_msaa_sharing) #define DECL_D3D_EXTENSIONS \ DECL_EXT(khr_d3d10_sharing)\ DECL_EXT(khr_dx9_media_sharing)\ DECL_EXT(khr_d3d11_sharing)\ #define DECL_ALL_EXTENSIONS \ DECL_BASE_EXTENSIONS \ DECL_OPT1_EXTENSIONS \ DECL_GL_EXTENSIONS \ DECL_D3D_EXTENSIONS #define EXT_ID(name) cl_ ## name ## _ext_id #define EXT_STRUCT_NAME(name) cl_ ## name ## ext /*Declare enum ids */ typedef enum { #define DECL_EXT(name) EXT_ID(name), DECL_ALL_EXTENSIONS #undef DECL_EXT cl_khr_extension_id_max }cl_extension_enum; #define BASE_EXT_START_ID EXT_ID(khr_global_int32_base_atomics) #define BASE_EXT_END_ID EXT_ID(khr_fp64) #define OPT1_EXT_START_ID EXT_ID(khr_int64_base_atomics) #define OPT1_EXT_END_ID EXT_ID(khr_icd) #define GL_EXT_START_ID EXT_ID(khr_gl_sharing) #define GL_EXT_END_ID EXT_ID(khr_gl_msaa_sharing) #define IS_BASE_EXTENSION(id) (id >= BASE_EXT_START_ID && id <= BASE_EXT_END_ID) #define IS_OPT1_EXTENSION(id) (id >= OPT1_EXT_START_ID && id <= OPT1_EXT_END_ID) #define IS_GL_EXTENSION(id) (id >= GL_EXT_START_ID && id <= GL_EXT_END_ID) struct cl_extension_base { cl_extension_enum ext_id; int ext_enabled; char *ext_name; }; /* Declare each extension structure. */ #define DECL_EXT(name) \ struct EXT_STRUCT_NAME(name) { \ struct cl_extension_base base;\ }; DECL_BASE_EXTENSIONS DECL_OPT1_EXTENSIONS DECL_D3D_EXTENSIONS DECL_GL_EXTENSIONS #undef DECL_EXT /* Union all extensions together. */ typedef union { struct cl_extension_base base; #define DECL_EXT(name) struct EXT_STRUCT_NAME(name) EXT_STRUCT_NAME(name); DECL_ALL_EXTENSIONS #undef DECL_EXT } extension_union; typedef struct cl_extensions { extension_union extensions[cl_khr_extension_id_max]; char ext_str[256]; } cl_extensions_t; struct _cl_platform_id; typedef struct _cl_platform_id * cl_platform_id; extern void cl_intel_platform_extension_init(cl_platform_id intel_platform); Release_v0.3/src/cl_gen75_device.h000066400000000000000000000020211223142177000170600ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /* Common fields for both SNB devices (either GT1 or GT2) */ .max_parameter_size = 256, .global_mem_cache_line_size = 128, /* XXX */ .global_mem_cache_size = 8 << 10, /* XXX */ .local_mem_type = CL_GLOBAL, .local_mem_size = 64 << 10, #include "cl_gt_device.h" Release_v0.3/src/cl_gen7_device.h000066400000000000000000000020201223142177000167720ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /* Common fields for both IVB devices (either GT1 or GT2) */ .max_parameter_size = 256, .global_mem_cache_line_size = 128, /* XXX */ .global_mem_cache_size = 8 << 10, /* XXX */ .local_mem_type = CL_GLOBAL, .local_mem_size = 64 << 10, #include "cl_gt_device.h" Release_v0.3/src/cl_gl_api.c000066400000000000000000000106751223142177000160600ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Zhigang Gong */ #include #include #include #ifdef HAS_EGL #include #endif #include "cl_platform_id.h" #include "cl_device_id.h" #include "cl_context.h" #include "cl_command_queue.h" #include "cl_program.h" #include "cl_kernel.h" #include "cl_mem.h" #include "cl_image.h" #include "cl_sampler.h" #include "cl_alloc.h" #include "cl_utils.h" #include "CL/cl.h" #include "CL/cl_gl.h" #include "CL/cl_intel.h" #include "cl_mem_gl.h" #define CHECK_GL_CONTEXT(CTX) \ do { \ if (UNLIKELY(CTX->props.gl_type == CL_GL_NOSHARE)) { \ err = CL_INVALID_CONTEXT; \ goto error; \ } \ } while (0) cl_mem clCreateFromGLBuffer(cl_context context, cl_mem_flags flags, GLuint bufobj, cl_int * errcode_ret) { cl_mem mem = NULL; cl_int err = CL_SUCCESS; CHECK_CONTEXT (context); CHECK_GL_CONTEXT (context); mem = cl_mem_new_gl_buffer(context, flags, bufobj, &err); error: if (errcode_ret) *errcode_ret = err; return mem; } cl_mem clCreateFromGLTexture2D(cl_context context, cl_mem_flags flags, GLenum texture_target, GLint miplevel, GLuint texture, cl_int * errcode_ret) { cl_mem mem = NULL; cl_int err = CL_SUCCESS; CHECK_CONTEXT (context); CHECK_GL_CONTEXT (context); mem = cl_mem_new_gl_texture(context, flags, texture_target, miplevel, texture, &err); error: if (errcode_ret) *errcode_ret = err; return mem; } cl_mem clCreateFromGLTexture3D(cl_context context, cl_mem_flags flags, GLenum texture_target, GLint miplevel, GLuint texture, cl_int * errcode_ret) { cl_mem mem = NULL; cl_int err = CL_SUCCESS; CHECK_CONTEXT (context); CHECK_GL_CONTEXT (context); mem = cl_mem_new_gl_texture(context, flags, texture_target, miplevel, texture, &err); error: if (errcode_ret) *errcode_ret = err; return mem; } cl_mem clCreateFromGLTexture(cl_context context, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel, cl_GLuint texture, cl_int * errcode_ret) { cl_mem mem = NULL; cl_int err = CL_SUCCESS; CHECK_CONTEXT (context); CHECK_GL_CONTEXT (context); mem = cl_mem_new_gl_texture(context, flags, target, miplevel, texture, &err); error: if (errcode_ret) *errcode_ret = err; return mem; } /* XXX NULL function currently. */ cl_int clEnqueueAcquireGLObjects (cl_command_queue command_queue, cl_uint num_objects, const cl_mem *mem_objects, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) { cl_int err = CL_SUCCESS; return err; } /* XXX NULL function currently. */ cl_int clEnqueueReleaseGLObjects (cl_command_queue command_queue, cl_uint num_objects, const cl_mem *mem_objects, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) { cl_int err = CL_SUCCESS; return err; } Release_v0.3/src/cl_gt_device.h000066400000000000000000000057651223142177000165670ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /* Common fields for both all GT devices (IVB / SNB) */ .device_type = CL_DEVICE_TYPE_GPU, .vendor_id = 0, /* == device_id (set when requested) */ .max_work_item_dimensions = 3, .preferred_vector_width_char = 16, .preferred_vector_width_short = 16, .preferred_vector_width_int = 16, .preferred_vector_width_long = 16, .preferred_vector_width_float = 16, .preferred_vector_width_double = 0, .preferred_vector_width_half = 0, .native_vector_width_char = 16, .native_vector_width_short = 16, .native_vector_width_int = 16, .native_vector_width_long = 16, .native_vector_width_float = 16, .native_vector_width_double = 16, .native_vector_width_half = 16, .address_bits = 32, .max_mem_alloc_size = 128 * 1024 * 1024, .image_support = CL_TRUE, .max_read_image_args = 0, .max_write_image_args = 0, .image2d_max_width = 8192, .image2d_max_height = 8192, .image3d_max_width = 8192, .image3d_max_height = 8192, .image3d_max_depth = 2048, .max_samplers = 8, .mem_base_addr_align = sizeof(cl_uint) * 8, .min_data_type_align_size = sizeof(cl_uint), .single_fp_config = 0, /* XXX */ .global_mem_cache_type = CL_READ_WRITE_CACHE, .global_mem_size = 128 * 1024 * 1024, .max_constant_buffer_size = 512 << 10, .max_constant_args = 8, .error_correction_support = CL_FALSE, .host_unified_memory = CL_FALSE, .profiling_timer_resolution = 80, /* ns */ .endian_little = CL_TRUE, .available = CL_TRUE, .compiler_available = CL_FALSE, /* XXX */ .execution_capabilities = CL_EXEC_KERNEL | CL_EXEC_NATIVE_KERNEL, .queue_properties = CL_QUEUE_PROFILING_ENABLE, .platform = NULL, /* == intel_platform (set when requested) */ /* IEEE 754, XXX does IVB support CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT? */ .single_fp_config = CL_FP_DENORM | CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST , /* IEEE 754. */ #define DECL_INFO_STRING(FIELD, STRING) \ .FIELD = STRING, \ .JOIN(FIELD,_sz) = sizeof(STRING) + 1, DECL_INFO_STRING(name, "Intel HD Graphics Family") DECL_INFO_STRING(vendor, "Intel") DECL_INFO_STRING(version, LIBCL_VERSION_STRING) DECL_INFO_STRING(profile, "FULL_PROFILE") DECL_INFO_STRING(opencl_c_version, LIBCL_C_VERSION_STRING) DECL_INFO_STRING(extensions, "") DECL_INFO_STRING(built_in_kernels, "") DECL_INFO_STRING(driver_version, LIBCL_DRIVER_VERSION_STRING) #undef DECL_INFO_STRING Release_v0.3/src/cl_image.c000066400000000000000000000210121223142177000156720ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #include "cl_image.h" #include "cl_utils.h" #include "intel/intel_defines.h" #include LOCAL cl_int cl_image_byte_per_pixel(const cl_image_format *fmt, uint32_t *bpp) { assert(bpp); const uint32_t type = fmt->image_channel_data_type; const uint32_t order = fmt->image_channel_order; switch (type) { #define DECL_BPP(DATA_TYPE, VALUE) case DATA_TYPE: *bpp = VALUE; DECL_BPP(CL_SNORM_INT8, 1); break; DECL_BPP(CL_SNORM_INT16, 2); break; DECL_BPP(CL_UNORM_INT8, 1); break; DECL_BPP(CL_UNORM_INT16, 2); break; DECL_BPP(CL_UNORM_SHORT_565, 2); if (order != CL_RGBx && order != CL_RGB) return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; break; DECL_BPP(CL_UNORM_SHORT_555, 2); if (order != CL_RGBx && order != CL_RGB) return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; break; DECL_BPP(CL_UNORM_INT_101010, 4); if (order != CL_RGBx && order != CL_RGB) return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; break; DECL_BPP(CL_SIGNED_INT8, 1); break; DECL_BPP(CL_SIGNED_INT16, 2); break; DECL_BPP(CL_SIGNED_INT32, 4); break; DECL_BPP(CL_UNSIGNED_INT8, 1); break; DECL_BPP(CL_UNSIGNED_INT16, 2); break; DECL_BPP(CL_UNSIGNED_INT32, 4); break; DECL_BPP(CL_HALF_FLOAT, 2); break; DECL_BPP(CL_FLOAT, 4); break; #undef DECL_BPP default: return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; }; switch (order) { case CL_Rx: break; case CL_R: break; case CL_A: break; case CL_RA: *bpp *= 2; break; case CL_RG: *bpp *= 2; break; case CL_INTENSITY: case CL_LUMINANCE: if (type != CL_UNORM_INT8 && type != CL_UNORM_INT16 && type != CL_SNORM_INT8 && type != CL_SNORM_INT16 && type != CL_HALF_FLOAT && type != CL_FLOAT) return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; break; case CL_RGB: case CL_RGBx: if (type != CL_UNORM_SHORT_555 && type != CL_UNORM_SHORT_565 && type != CL_UNORM_INT_101010) return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; break; case CL_RGBA: *bpp *= 4; break; case CL_ARGB: case CL_BGRA: if (type != CL_UNORM_INT8 && type != CL_SIGNED_INT8 && type != CL_SNORM_INT8 && type != CL_UNSIGNED_INT8) return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; *bpp *= 4; break; default: return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; }; return CL_SUCCESS; } LOCAL uint32_t cl_image_get_intel_format(const cl_image_format *fmt) { const uint32_t type = fmt->image_channel_data_type; const uint32_t order = fmt->image_channel_order; switch (order) { case CL_R: #if 0 case CL_Rx: case CL_A: case CL_INTENSITY: case CL_LUMINANCE: if ((order == CL_INTENSITY || order == CL_LUMINANCE) && (type != CL_UNORM_INT8 && type != CL_UNORM_INT16 && type != CL_SNORM_INT8 && type != CL_SNORM_INT16 && type != CL_HALF_FLOAT && type != CL_FLOAT)) return INTEL_UNSUPPORTED_FORMAT; #endif /* XXX it seems we have some acuracy compatible issue with snomr_int8/16, * have to disable those formats currently. */ switch (type) { case CL_HALF_FLOAT: return I965_SURFACEFORMAT_R16_FLOAT; case CL_FLOAT: return I965_SURFACEFORMAT_R32_FLOAT; // case CL_SNORM_INT16: return I965_SURFACEFORMAT_R16_SNORM; // case CL_SNORM_INT8: return I965_SURFACEFORMAT_R8_SNORM; case CL_UNORM_INT8: return I965_SURFACEFORMAT_R8_UNORM; case CL_UNORM_INT16: return I965_SURFACEFORMAT_R16_UNORM; case CL_SIGNED_INT8: return I965_SURFACEFORMAT_R8_SINT; case CL_SIGNED_INT16: return I965_SURFACEFORMAT_R16_SINT; case CL_SIGNED_INT32: return I965_SURFACEFORMAT_R32_SINT; case CL_UNSIGNED_INT8: return I965_SURFACEFORMAT_R8_UINT; case CL_UNSIGNED_INT16: return I965_SURFACEFORMAT_R16_UINT; case CL_UNSIGNED_INT32: return I965_SURFACEFORMAT_R32_UINT; default: return INTEL_UNSUPPORTED_FORMAT; }; #if 0 case CL_RG: case CL_RA: switch (type) { case CL_HALF_FLOAT: return I965_SURFACEFORMAT_R16G16_FLOAT; case CL_FLOAT: return I965_SURFACEFORMAT_R32G32_FLOAT; case CL_SNORM_INT16: return I965_SURFACEFORMAT_R16G16_SNORM; case CL_SNORM_INT8: return I965_SURFACEFORMAT_R8G8_SNORM; case CL_UNORM_INT8: return I965_SURFACEFORMAT_R8G8_UNORM; case CL_UNORM_INT16: return I965_SURFACEFORMAT_R16G16_UNORM; case CL_SIGNED_INT8: return I965_SURFACEFORMAT_R8G8_SINT; case CL_SIGNED_INT16: return I965_SURFACEFORMAT_R16G16_SINT; case CL_SIGNED_INT32: return I965_SURFACEFORMAT_R32G32_SINT; case CL_UNSIGNED_INT8: return I965_SURFACEFORMAT_R8G8_UINT; case CL_UNSIGNED_INT16: return I965_SURFACEFORMAT_R16G16_UINT; case CL_UNSIGNED_INT32: return I965_SURFACEFORMAT_R32G32_UINT; default: return INTEL_UNSUPPORTED_FORMAT; }; case CL_RGB: case CL_RGBx: switch (type) { case CL_UNORM_INT_101010: return I965_SURFACEFORMAT_R10G10B10A2_UNORM; case CL_UNORM_SHORT_565: case CL_UNORM_SHORT_555: default: return INTEL_UNSUPPORTED_FORMAT; }; #endif case CL_RGBA: switch (type) { case CL_HALF_FLOAT: return I965_SURFACEFORMAT_R16G16B16A16_FLOAT; case CL_FLOAT: return I965_SURFACEFORMAT_R32G32B32A32_FLOAT; // case CL_SNORM_INT16: return I965_SURFACEFORMAT_R16G16B16A16_SNORM; // case CL_SNORM_INT8: return I965_SURFACEFORMAT_R8G8B8A8_SNORM; case CL_UNORM_INT8: return I965_SURFACEFORMAT_R8G8B8A8_UNORM; case CL_UNORM_INT16: return I965_SURFACEFORMAT_R16G16B16A16_UNORM; case CL_SIGNED_INT8: return I965_SURFACEFORMAT_R8G8B8A8_SINT; case CL_SIGNED_INT16: return I965_SURFACEFORMAT_R16G16B16A16_SINT; case CL_SIGNED_INT32: return I965_SURFACEFORMAT_R32G32B32A32_SINT; case CL_UNSIGNED_INT8: return I965_SURFACEFORMAT_R8G8B8A8_UINT; case CL_UNSIGNED_INT16: return I965_SURFACEFORMAT_R16G16B16A16_UINT; case CL_UNSIGNED_INT32: return I965_SURFACEFORMAT_R32G32B32A32_UINT; default: return INTEL_UNSUPPORTED_FORMAT; }; case CL_ARGB: return INTEL_UNSUPPORTED_FORMAT; case CL_BGRA: switch (type) { case CL_UNORM_INT8: return I965_SURFACEFORMAT_B8G8R8A8_UNORM; default: return INTEL_UNSUPPORTED_FORMAT; }; default: return INTEL_UNSUPPORTED_FORMAT; }; } static const uint32_t cl_image_order[] = { CL_R, CL_A, CL_RG, CL_RA, CL_RGB, CL_RGBA, CL_BGRA, CL_ARGB, CL_INTENSITY, CL_LUMINANCE, CL_Rx, CL_RGx, CL_RGBx }; static const uint32_t cl_image_type[] = { CL_SNORM_INT8, CL_SNORM_INT16, CL_UNORM_INT8, CL_UNORM_INT16, CL_UNORM_SHORT_565, CL_UNORM_SHORT_555, CL_UNORM_INT_101010, CL_SIGNED_INT8, CL_SIGNED_INT16, CL_SIGNED_INT32, CL_UNSIGNED_INT8, CL_UNSIGNED_INT16, CL_UNSIGNED_INT32, CL_HALF_FLOAT, CL_FLOAT }; static const size_t cl_image_order_n = SIZEOF32(cl_image_order); static const size_t cl_image_type_n = SIZEOF32(cl_image_type); cl_int cl_image_get_supported_fmt(cl_context ctx, cl_mem_object_type image_type, cl_uint num_entries, cl_image_format *image_formats, cl_uint *num_image_formats) { size_t i, j, n = 0; for (i = 0; i < cl_image_order_n; ++i) for (j = 0; j < cl_image_type_n; ++j) { const cl_image_format fmt = { .image_channel_order = cl_image_order[i], .image_channel_data_type = cl_image_type[j] }; const uint32_t intel_fmt = cl_image_get_intel_format(&fmt); if (intel_fmt == INTEL_UNSUPPORTED_FORMAT) continue; if (n < num_entries && image_formats) image_formats[n] = fmt; n++; } if (num_image_formats) *num_image_formats = n; return CL_SUCCESS; } Release_v0.3/src/cl_image.h000066400000000000000000000032421223142177000157040ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #ifndef __CL_IMAGE_H__ #define __CL_IMAGE_H__ #include "cl_internals.h" #include "CL/cl.h" #include /* Returned when the OCL format is not supported */ #define INTEL_UNSUPPORTED_FORMAT ((uint32_t) ~0x0u) /* Compute the number of bytes per pixel if the format is supported */ extern cl_int cl_image_byte_per_pixel(const cl_image_format *fmt, uint32_t *bpp); /* Return the intel format for the given OCL format */ extern uint32_t cl_image_get_intel_format(const cl_image_format *fmt); /* Return the list of formats supported by the API */ extern cl_int cl_image_get_supported_fmt(cl_context context, cl_mem_object_type image_type, cl_uint num_entries, cl_image_format *image_formats, cl_uint *num_image_formats); #endif /* __CL_IMAGE_H__ */ Release_v0.3/src/cl_internals.h000066400000000000000000000026341223142177000166250ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #ifndef __CL_INTERNALS_H__ #define __CL_INTERNALS_H__ /* We put a header to identify each object. This will make the programmer life * easy if objects are wrongly used in the API */ #define CL_MAGIC_KERNEL_HEADER 0x1234567890abcdefLL #define CL_MAGIC_CONTEXT_HEADER 0x0ab123456789cdefLL #define CL_MAGIC_PROGRAM_HEADER 0x34560ab12789cdefLL #define CL_MAGIC_QUEUE_HEADER 0x83650a12b79ce4dfLL #define CL_MAGIC_SAMPLER_HEADER 0x686a0ecba79ce33fLL #define CL_MAGIC_EVENT_HEADER 0x8324a9c810ebf90fLL #define CL_MAGIC_MEM_HEADER 0x381a27b9ce6504dfLL #define CL_MAGIC_DEAD_HEADER 0xdeaddeaddeaddeadLL #endif /* __CL_INTERNALS_H__ */ Release_v0.3/src/cl_kernel.c000066400000000000000000000220001223142177000160660ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #include "cl_kernel.h" #include "cl_program.h" #include "cl_device_id.h" #include "cl_context.h" #include "cl_mem.h" #include "cl_alloc.h" #include "cl_utils.h" #include "cl_khr_icd.h" #include "CL/cl.h" #include "cl_sampler.h" #include #include #include #include #include LOCAL void cl_kernel_delete(cl_kernel k) { uint32_t i; if (k == NULL) return; /* We are not done with the kernel */ if (atomic_dec(&k->ref_n) > 1) return; /* Release one reference on all bos we own */ if (k->bo) cl_buffer_unreference(k->bo); if (k->const_bo) cl_buffer_unreference(k->const_bo); /* This will be true for kernels created by clCreateKernel */ if (k->ref_its_program) cl_program_delete(k->program); /* Release the curbe if allocated */ if (k->curbe) cl_free(k->curbe); /* Release the argument array if required */ if (k->args) { for (i = 0; i < k->arg_n; ++i) if (k->args[i].mem != NULL) cl_mem_delete(k->args[i].mem); cl_free(k->args); } if (k->image_sz) cl_free(k->images); k->magic = CL_MAGIC_DEAD_HEADER; /* For safety */ cl_free(k); } LOCAL cl_kernel cl_kernel_new(cl_program p) { cl_kernel k = NULL; TRY_ALLOC_NO_ERR (k, CALLOC(struct _cl_kernel)); SET_ICD(k->dispatch) k->ref_n = 1; k->magic = CL_MAGIC_KERNEL_HEADER; k->program = p; exit: return k; error: cl_kernel_delete(k); k = NULL; goto exit; } LOCAL const char* cl_kernel_get_name(cl_kernel k) { if (UNLIKELY(k == NULL)) return NULL; return gbe_kernel_get_name(k->opaque); } LOCAL void cl_kernel_add_ref(cl_kernel k) { atomic_inc(&k->ref_n); } LOCAL cl_int cl_kernel_set_arg(cl_kernel k, cl_uint index, size_t sz, const void *value) { uint32_t offset; /* where to patch */ enum gbe_arg_type arg_type; /* kind of argument */ size_t arg_sz; /* size of the argument */ cl_mem mem; /* for __global, __constant and image arguments */ if (UNLIKELY(index >= k->arg_n)) return CL_INVALID_ARG_INDEX; arg_type = gbe_kernel_get_arg_type(k->opaque, index); arg_sz = gbe_kernel_get_arg_size(k->opaque, index); if (UNLIKELY(arg_type != GBE_ARG_LOCAL_PTR && arg_sz != sz)) return CL_INVALID_ARG_SIZE; if(UNLIKELY(arg_type == GBE_ARG_LOCAL_PTR && sz == 0)) return CL_INVALID_ARG_SIZE; if(arg_type == GBE_ARG_VALUE) { if(UNLIKELY(value == NULL)) return CL_INVALID_ARG_VALUE; } else if(arg_type == GBE_ARG_LOCAL_PTR) { if(UNLIKELY(value != NULL)) return CL_INVALID_ARG_VALUE; } else if(arg_type == GBE_ARG_SAMPLER) { if (UNLIKELY(value == NULL)) return CL_INVALID_ARG_VALUE; cl_sampler s = *(cl_sampler*)value; if(s->magic != CL_MAGIC_SAMPLER_HEADER) return CL_INVALID_SAMPLER; } else { // should be image, GLOBAL_PTR, CONSTANT_PTR if (UNLIKELY(value == NULL && arg_type == GBE_ARG_IMAGE)) return CL_INVALID_ARG_VALUE; if(value != NULL) { mem = *(cl_mem*)value; if (UNLIKELY(mem->magic != CL_MAGIC_MEM_HEADER)) return CL_INVALID_MEM_OBJECT; if (UNLIKELY((arg_type == GBE_ARG_IMAGE && !IS_IMAGE(mem)) || (arg_type != GBE_ARG_IMAGE && IS_IMAGE(mem)))) return CL_INVALID_ARG_VALUE; } } /* Copy the structure or the value directly into the curbe */ if (arg_type == GBE_ARG_VALUE) { offset = gbe_kernel_get_curbe_offset(k->opaque, GBE_CURBE_KERNEL_ARGUMENT, index); assert(offset + sz <= k->curbe_sz); memcpy(k->curbe + offset, value, sz); k->args[index].local_sz = 0; k->args[index].is_set = 1; k->args[index].mem = NULL; return CL_SUCCESS; } /* For a local pointer just save the size */ if (arg_type == GBE_ARG_LOCAL_PTR) { k->args[index].local_sz = sz; k->args[index].is_set = 1; k->args[index].mem = NULL; return CL_SUCCESS; } /* Is it a sampler*/ if (arg_type == GBE_ARG_SAMPLER) { cl_sampler sampler; memcpy(&sampler, value, sz); k->args[index].local_sz = 0; k->args[index].is_set = 1; k->args[index].mem = NULL; k->args[index].sampler = sampler; cl_set_sampler_arg_slot(k, index, sampler); return CL_SUCCESS; } if(value == NULL) { /* for buffer object GLOBAL_PTR CONSTANT_PTR, it maybe NULL */ int32_t offset = gbe_kernel_get_curbe_offset(k->opaque, GBE_CURBE_KERNEL_ARGUMENT, index); *((uint32_t *)(k->curbe + offset)) = 0; assert(arg_type == GBE_ARG_GLOBAL_PTR || arg_type == GBE_ARG_CONSTANT_PTR); if (k->args[index].mem) cl_mem_delete(k->args[index].mem); k->args[index].mem = NULL; k->args[index].is_set = 1; k->args[index].local_sz = 0; return CL_SUCCESS; } mem = *(cl_mem*) value; cl_mem_add_ref(mem); if (k->args[index].mem) cl_mem_delete(k->args[index].mem); k->args[index].mem = mem; k->args[index].is_set = 1; k->args[index].local_sz = 0; return CL_SUCCESS; } LOCAL uint32_t cl_kernel_get_simd_width(cl_kernel k) { assert(k != NULL); return gbe_kernel_get_simd_width(k->opaque); } LOCAL void cl_kernel_setup(cl_kernel k, gbe_kernel opaque) { cl_context ctx = k->program->ctx; cl_buffer_mgr bufmgr = cl_context_get_bufmgr(ctx); if(k->bo != NULL) cl_buffer_unreference(k->bo); /* Allocate the gen code here */ const uint32_t code_sz = gbe_kernel_get_code_size(opaque); const char *code = gbe_kernel_get_code(opaque); k->bo = cl_buffer_alloc(bufmgr, "CL kernel", code_sz, 64u); k->arg_n = gbe_kernel_get_arg_num(opaque); /* Upload the code */ cl_buffer_subdata(k->bo, 0, code_sz, code); k->opaque = opaque; /* Create the curbe */ k->curbe_sz = gbe_kernel_get_curbe_size(k->opaque); /* Get sampler data & size */ k->sampler_sz = gbe_kernel_get_sampler_size(k->opaque); assert(k->sampler_sz <= GEN_MAX_SAMPLERS); if (k->sampler_sz > 0) gbe_kernel_get_sampler_data(k->opaque, k->samplers); /* Get image data & size */ k->image_sz = gbe_kernel_get_image_size(k->opaque); assert(k->sampler_sz <= GEN_MAX_SURFACES); if (k->image_sz > 0) { TRY_ALLOC_NO_ERR(k->images, cl_calloc(k->image_sz, sizeof(k->images[0]))); gbe_kernel_get_image_data(k->opaque, k->images); } else k->images = NULL; return; error: cl_buffer_unreference(k->bo); k->bo = NULL; } LOCAL cl_kernel cl_kernel_dup(cl_kernel from) { cl_kernel to = NULL; if (UNLIKELY(from == NULL)) return NULL; TRY_ALLOC_NO_ERR (to, CALLOC(struct _cl_kernel)); SET_ICD(to->dispatch) to->bo = from->bo; to->const_bo = from->const_bo; to->opaque = from->opaque; to->ref_n = 1; to->magic = CL_MAGIC_KERNEL_HEADER; to->program = from->program; to->arg_n = from->arg_n; to->curbe_sz = from->curbe_sz; to->sampler_sz = from->sampler_sz; to->image_sz = from->image_sz; if (to->sampler_sz) memcpy(to->samplers, from->samplers, to->sampler_sz * sizeof(uint32_t)); if (to->image_sz) { TRY_ALLOC_NO_ERR(to->images, cl_calloc(to->image_sz, sizeof(to->images[0]))); memcpy(to->images, from->images, to->image_sz * sizeof(to->images[0])); } else to->images = NULL; TRY_ALLOC_NO_ERR(to->args, cl_calloc(to->arg_n, sizeof(cl_argument))); if (to->curbe_sz) TRY_ALLOC_NO_ERR(to->curbe, cl_calloc(1, to->curbe_sz)); /* Retain the bos */ if (from->bo) cl_buffer_reference(from->bo); if (from->const_bo) cl_buffer_reference(from->const_bo); /* We retain the program destruction since this kernel (user allocated) * depends on the program for some of its pointers */ assert(from->program); cl_program_add_ref(from->program); to->ref_its_program = CL_TRUE; exit: return to; error: cl_kernel_delete(to); to = NULL; goto exit; } LOCAL cl_int cl_kernel_work_group_sz(cl_kernel ker, const size_t *local_wk_sz, uint32_t wk_dim, size_t *wk_grp_sz) { cl_int err = CL_SUCCESS; size_t sz = 0; cl_uint i; for (i = 0; i < wk_dim; ++i) { const uint32_t required_sz = gbe_kernel_get_required_work_group_size(ker->opaque, i); if (required_sz != 0 && required_sz != local_wk_sz[i]) { err = CL_INVALID_WORK_ITEM_SIZE; goto error; } } sz = local_wk_sz[0]; for (i = 1; i < wk_dim; ++i) sz *= local_wk_sz[i]; if (sz > ker->program->ctx->device->max_work_group_size) { err = CL_INVALID_WORK_ITEM_SIZE; goto error; } error: if (wk_grp_sz) *wk_grp_sz = sz; return err; } Release_v0.3/src/cl_kernel.h000066400000000000000000000073351223142177000161110ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #ifndef __CL_KERNEL_H__ #define __CL_KERNEL_H__ #include "cl_internals.h" #include "cl_driver.h" #include "program.h" #include "CL/cl.h" #include #include /* This is the kernel as it is interfaced by the compiler */ struct _gbe_kernel; /* We need to save buffer data for relocation and binding and we must figure out * if all arguments are properly set */ typedef struct cl_argument { cl_mem mem; /* For image and regular buffers */ cl_sampler sampler; /* For sampler. */ uint32_t local_sz:31; /* For __local size specification */ uint32_t is_set:1; /* All args must be set before NDRange */ } cl_argument; /* One OCL function */ struct _cl_kernel { DEFINE_ICD(dispatch) uint64_t magic; /* To identify it as a kernel */ volatile int ref_n; /* We reference count this object */ cl_buffer bo; /* The code itself */ cl_buffer const_bo; /* Buffer for all __constants values in the OCL program */ cl_program program; /* Owns this structure (and pointers) */ gbe_kernel opaque; /* (Opaque) compiler structure for the OCL kernel */ char *curbe; /* One curbe per kernel */ size_t curbe_sz; /* Size of it */ uint32_t samplers[GEN_MAX_SAMPLERS]; /* samplers defined in kernel & kernel args */ size_t sampler_sz; /* sampler size defined in kernel & kernel args. */ struct ImageInfo *images; /* images defined in kernel args */ size_t image_sz; /* image count in kernel args */ cl_argument *args; /* To track argument setting */ uint32_t arg_n:31; /* Number of arguments */ uint32_t ref_its_program:1; /* True only for the user kernel (created by clCreateKernel) */ }; /* Allocate an empty kernel */ extern cl_kernel cl_kernel_new(cl_program); /* Destroy and deallocate an empty kernel */ extern void cl_kernel_delete(cl_kernel); /* Setup the kernel with the given GBE Kernel */ extern void cl_kernel_setup(cl_kernel k, gbe_kernel opaque); /* Get the kernel name */ extern const char *cl_kernel_get_name(cl_kernel k); /* Get the simd width as used in the code */ extern uint32_t cl_kernel_get_simd_width(cl_kernel k); /* When a kernel is created from outside, we just duplicate the structure we * have internally and give it back to the user */ extern cl_kernel cl_kernel_dup(cl_kernel); /* Add one more reference on the kernel object */ extern void cl_kernel_add_ref(cl_kernel); /* Set the argument before kernel execution */ extern int cl_kernel_set_arg(cl_kernel, uint32_t arg_index, size_t arg_size, const void *arg_value); /* Compute and check the work group size from the user provided local size */ extern cl_int cl_kernel_work_group_sz(cl_kernel ker, const size_t *local_wk_sz, cl_uint wk_dim, size_t *wk_grp_sz); #endif /* __CL_KERNEL_H__ */ Release_v0.3/src/cl_khr_icd.c000066400000000000000000000120441223142177000162200ustar00rootroot00000000000000/* * Copyright © 2013 Simon Richter * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . */ #include #include "cl_platform_id.h" /* The interop functions are not implemented in Beignet */ #define CL_GL_INTEROP(x) NULL /* OpenCL 1.2 is not implemented in Beignet */ #define CL_1_2_NOTYET(x) NULL /** Return platform list through ICD interface * This code is used only if a client is linked directly against the library * instead of using the ICD loader. In this case, no other implementations * should exist in the process address space, so the call is equivalent to * clGetPlatformIDs(). * * @param[in] num_entries Number of entries allocated in return buffer * @param[out] platforms Platform identifiers supported by this implementation * @param[out] num_platforms Number of platform identifiers returned * @return OpenCL error code * @retval CL_SUCCESS Successful execution * @retval CL_PLATFORM_NOT_FOUND_KHR No platforms provided * @retval CL_INVALID_VALUE Invalid parameters */ cl_int clIcdGetPlatformIDsKHR(cl_uint num_entries, cl_platform_id * platforms, cl_uint * num_platforms) { return clGetPlatformIDs(num_entries, platforms, num_platforms); } struct _cl_icd_dispatch const cl_khr_icd_dispatch = { clGetPlatformIDs, clGetPlatformInfo, clGetDeviceIDs, clGetDeviceInfo, clCreateContext, clCreateContextFromType, clRetainContext, clReleaseContext, clGetContextInfo, clCreateCommandQueue, clRetainCommandQueue, clReleaseCommandQueue, clGetCommandQueueInfo, (void *) NULL, /* clSetCommandQueueProperty */ clCreateBuffer, clCreateImage2D, clCreateImage3D, clRetainMemObject, clReleaseMemObject, clGetSupportedImageFormats, clGetMemObjectInfo, clGetImageInfo, clCreateSampler, clRetainSampler, clReleaseSampler, clGetSamplerInfo, clCreateProgramWithSource, clCreateProgramWithBinary, clRetainProgram, clReleaseProgram, clBuildProgram, clUnloadCompiler, clGetProgramInfo, clGetProgramBuildInfo, clCreateKernel, clCreateKernelsInProgram, clRetainKernel, clReleaseKernel, clSetKernelArg, clGetKernelInfo, clGetKernelWorkGroupInfo, clWaitForEvents, clGetEventInfo, clRetainEvent, clReleaseEvent, clGetEventProfilingInfo, clFlush, clFinish, clEnqueueReadBuffer, clEnqueueWriteBuffer, clEnqueueCopyBuffer, clEnqueueReadImage, clEnqueueWriteImage, clEnqueueCopyImage, clEnqueueCopyImageToBuffer, clEnqueueCopyBufferToImage, clEnqueueMapBuffer, clEnqueueMapImage, clEnqueueUnmapMemObject, clEnqueueNDRangeKernel, clEnqueueTask, clEnqueueNativeKernel, clEnqueueMarker, clEnqueueWaitForEvents, clEnqueueBarrier, clGetExtensionFunctionAddress, CL_GL_INTEROP(clCreateFromGLBuffer), CL_GL_INTEROP(clCreateFromGLTexture2D), CL_GL_INTEROP(clCreateFromGLTexture3D), CL_GL_INTEROP(clCreateFromGLRenderbuffer), CL_GL_INTEROP(clGetGLObjectInfo), CL_GL_INTEROP(clGetGLTextureInfo), CL_GL_INTEROP(clEnqueueAcquireGLObjects), CL_GL_INTEROP(clEnqueueReleaseGLObjects), CL_GL_INTEROP(clGetGLContextInfoKHR), (void *) NULL, (void *) NULL, (void *) NULL, (void *) NULL, (void *) NULL, (void *) NULL, clSetEventCallback, clCreateSubBuffer, clSetMemObjectDestructorCallback, clCreateUserEvent, clSetUserEventStatus, clEnqueueReadBufferRect, clEnqueueWriteBufferRect, clEnqueueCopyBufferRect, CL_1_2_NOTYET(clCreateSubDevicesEXT), CL_1_2_NOTYET(clRetainDeviceEXT), CL_1_2_NOTYET(clReleaseDeviceEXT), #ifdef CL_VERSION_1_2 (void *) NULL, CL_1_2_NOTYET(clCreateSubDevices), CL_1_2_NOTYET(clRetainDevice), CL_1_2_NOTYET(clReleaseDevice), CL_1_2_NOTYET(clCreateImage), CL_1_2_NOTYET(clCreateProgramWithBuiltInKernels), CL_1_2_NOTYET(clCompileProgram), CL_1_2_NOTYET(clLinkProgram), CL_1_2_NOTYET(clUnloadPlatformCompiler), CL_1_2_NOTYET(clGetKernelArgInfo), CL_1_2_NOTYET(clEnqueueFillBuffer), CL_1_2_NOTYET(clEnqueueFillImage), CL_1_2_NOTYET(clEnqueueMigrateMemObjects), CL_1_2_NOTYET(clEnqueueMarkerWithWaitList), CL_1_2_NOTYET(clEnqueueBarrierWithWaitList), CL_1_2_NOTYET(clGetExtensionFunctionAddressForPlatform), CL_GL_INTEROP(clCreateFromGLTexture), (void *) NULL, (void *) NULL, (void *) NULL, (void *) NULL, (void *) NULL, (void *) NULL, (void *) NULL, (void *) NULL, (void *) NULL, (void *) NULL, (void *) NULL, (void *) NULL, (void *) NULL #endif }; Release_v0.3/src/cl_khr_icd.h000066400000000000000000000021471223142177000162300ustar00rootroot00000000000000/* * Copyright © 2013 Simon Richter * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . */ #ifndef __CL_KHR_ICD_H__ #define __CL_KHR_ICD_H__ #ifdef HAS_OCLIcd #define SET_ICD(dispatch) \ dispatch = &cl_khr_icd_dispatch; #define INIT_ICD(member) .member = &cl_khr_icd_dispatch, #define DEFINE_ICD(member) struct _cl_icd_dispatch const *member; extern struct _cl_icd_dispatch const cl_khr_icd_dispatch; #else #define SET_ICD(dispatch) #define INIT_ICD(member) #define DEFINE_ICD(member) #endif #endif Release_v0.3/src/cl_mem.c000066400000000000000000001117111223142177000153740ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #include "cl_mem.h" #include "cl_image.h" #include "cl_context.h" #include "cl_utils.h" #include "cl_alloc.h" #include "cl_device_id.h" #include "cl_driver.h" #include "cl_khr_icd.h" #include "cl_kernel.h" #include "cl_command_queue.h" #include "CL/cl.h" #include "CL/cl_intel.h" #include #include #include #define FIELD_SIZE(CASE,TYPE) \ case JOIN(CL_,CASE): \ if(param_value_size_ret) \ *param_value_size_ret = sizeof(TYPE); \ if(!param_value) \ return CL_SUCCESS; \ if(param_value_size < sizeof(TYPE)) \ return CL_INVALID_VALUE; \ break; #define CL_MEM_OBJECT_BUFFER 0x10F0 #define CL_MEM_OBJECT_IMAGE2D 0x10F1 #define CL_MEM_OBJECT_IMAGE3D 0x10F2 static cl_mem_object_type cl_get_mem_object_type(cl_mem mem) { switch (mem->type) { case CL_MEM_BUFFER_TYPE: return CL_MEM_OBJECT_BUFFER; case CL_MEM_IMAGE_TYPE: case CL_MEM_GL_IMAGE_TYPE: { struct _cl_mem_image *image = cl_mem_image(mem); if (image->depth == 1) return CL_MEM_OBJECT_IMAGE1D; else if (image->depth == 2) return CL_MEM_OBJECT_IMAGE2D; else if (image->depth == 3) return CL_MEM_OBJECT_IMAGE3D; } default: return CL_MEM_OBJECT_BUFFER; } } LOCAL cl_int cl_get_mem_object_info(cl_mem mem, cl_mem_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) { switch(param_name) { FIELD_SIZE(MEM_TYPE, cl_mem_object_type); FIELD_SIZE(MEM_FLAGS, cl_mem_flags); FIELD_SIZE(MEM_SIZE, size_t); FIELD_SIZE(MEM_HOST_PTR, void *); FIELD_SIZE(MEM_MAP_COUNT, cl_uint); FIELD_SIZE(MEM_REFERENCE_COUNT, cl_uint); FIELD_SIZE(MEM_CONTEXT, cl_context); FIELD_SIZE(MEM_ASSOCIATED_MEMOBJECT, cl_mem); FIELD_SIZE(MEM_OFFSET, size_t); default: return CL_INVALID_VALUE; } switch(param_name) { case CL_MEM_TYPE: *((cl_mem_object_type *)param_value) = cl_get_mem_object_type(mem); break; case CL_MEM_FLAGS: *((cl_mem_flags *)param_value) = mem->flags; break; case CL_MEM_SIZE: *((size_t *)param_value) = mem->size; break; case CL_MEM_HOST_PTR: *((size_t *)param_value) = (size_t)mem->host_ptr; break; case CL_MEM_MAP_COUNT: *((cl_uint *)param_value) = mem->map_ref; break; case CL_MEM_REFERENCE_COUNT: *((cl_uint *)param_value) = mem->ref_n; break; case CL_MEM_CONTEXT: *((cl_context *)param_value) = mem->ctx; break; // TODO: Need to implement sub buffer first. case CL_MEM_ASSOCIATED_MEMOBJECT: NOT_IMPLEMENTED; break; case CL_MEM_OFFSET: NOT_IMPLEMENTED; break; } return CL_SUCCESS; } LOCAL cl_int cl_get_image_info(cl_mem mem, cl_image_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) { int err; CHECK_IMAGE(mem, image); switch(param_name) { FIELD_SIZE(IMAGE_FORMAT, cl_image_format); FIELD_SIZE(IMAGE_ELEMENT_SIZE, size_t); FIELD_SIZE(IMAGE_ROW_PITCH, size_t); FIELD_SIZE(IMAGE_SLICE_PITCH, size_t); FIELD_SIZE(IMAGE_WIDTH, size_t); FIELD_SIZE(IMAGE_HEIGHT, size_t); FIELD_SIZE(IMAGE_DEPTH, size_t); default: return CL_INVALID_VALUE; } switch(param_name) { case CL_IMAGE_FORMAT: *(cl_image_format *)param_value = image->fmt; break; case CL_IMAGE_ELEMENT_SIZE: *(size_t *)param_value = image->bpp; break; case CL_IMAGE_ROW_PITCH: *(size_t *)param_value = image->row_pitch; break; case CL_IMAGE_SLICE_PITCH: *(size_t *)param_value = image->slice_pitch; break; case CL_IMAGE_WIDTH: *(size_t *)param_value = image->w; break; case CL_IMAGE_HEIGHT: *(size_t *)param_value = image->h; break; case CL_IMAGE_DEPTH: *(size_t *)param_value = image->depth; break; } return CL_SUCCESS; error: return err; } #undef FIELD_SIZE LOCAL cl_mem cl_mem_allocate(enum cl_mem_type type, cl_context ctx, cl_mem_flags flags, size_t sz, cl_int is_tiled, cl_int *errcode) { cl_buffer_mgr bufmgr = NULL; cl_mem mem = NULL; cl_int err = CL_SUCCESS; size_t alignment = 64; cl_ulong max_mem_size; assert(ctx); if ((err = cl_get_device_info(ctx->device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(max_mem_size), &max_mem_size, NULL)) != CL_SUCCESS) { goto error; } if (UNLIKELY(sz > max_mem_size)) { err = CL_INVALID_BUFFER_SIZE; goto error; } /* Allocate and inialize the structure itself */ if (type == CL_MEM_IMAGE_TYPE) { struct _cl_mem_image *image = NULL; TRY_ALLOC (image, CALLOC(struct _cl_mem_image)); mem = &image->base; } else if (type == CL_MEM_GL_IMAGE_TYPE ) { struct _cl_mem_gl_image *gl_image = NULL; TRY_ALLOC (gl_image, CALLOC(struct _cl_mem_gl_image)); mem = &gl_image->base.base; } else { struct _cl_mem_buffer *buffer = NULL; TRY_ALLOC (buffer, CALLOC(struct _cl_mem_buffer)); mem = &buffer->base; } mem->type = type; SET_ICD(mem->dispatch) mem->ref_n = 1; mem->magic = CL_MAGIC_MEM_HEADER; mem->flags = flags; if (sz != 0) { /* Pinning will require stricter alignment rules */ if ((flags & CL_MEM_PINNABLE) || is_tiled) alignment = 4096; /* Allocate space in memory */ bufmgr = cl_context_get_bufmgr(ctx); assert(bufmgr); mem->bo = cl_buffer_alloc(bufmgr, "CL memory object", sz, alignment); if (UNLIKELY(mem->bo == NULL)) { err = CL_MEM_OBJECT_ALLOCATION_FAILURE; goto error; } mem->size = sz; } cl_context_add_ref(ctx); mem->ctx = ctx; /* Append the buffer in the context buffer list */ pthread_mutex_lock(&ctx->buffer_lock); mem->next = ctx->buffers; if (ctx->buffers != NULL) ctx->buffers->prev = mem; ctx->buffers = mem; pthread_mutex_unlock(&ctx->buffer_lock); exit: if (errcode) *errcode = err; return mem; error: cl_mem_delete(mem); mem = NULL; goto exit; } LOCAL cl_mem cl_mem_new_buffer(cl_context ctx, cl_mem_flags flags, size_t sz, void *data, cl_int *errcode_ret) { /* Possible mem type combination: CL_MEM_ALLOC_HOST_PTR CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR CL_MEM_USE_HOST_PTR CL_MEM_COPY_HOST_PTR */ cl_int err = CL_SUCCESS; cl_mem mem = NULL; /* This flag is valid only if host_ptr is not NULL */ if (UNLIKELY((flags & CL_MEM_COPY_HOST_PTR || flags & CL_MEM_USE_HOST_PTR) && data == NULL)) { err = CL_INVALID_HOST_PTR; goto error; } /* CL_MEM_ALLOC_HOST_PTR and CL_MEM_USE_HOST_PTR are mutually exclusive. */ if (UNLIKELY(flags & CL_MEM_ALLOC_HOST_PTR && flags & CL_MEM_USE_HOST_PTR)) { err = CL_INVALID_HOST_PTR; goto error; } /* CL_MEM_COPY_HOST_PTR and CL_MEM_USE_HOST_PTR are mutually exclusive. */ if (UNLIKELY(flags & CL_MEM_COPY_HOST_PTR && flags & CL_MEM_USE_HOST_PTR)) { err = CL_INVALID_HOST_PTR; goto error; } /* Create the buffer in video memory */ mem = cl_mem_allocate(CL_MEM_BUFFER_TYPE, ctx, flags, sz, CL_FALSE, &err); if (mem == NULL || err != CL_SUCCESS) goto error; /* Copy the data if required */ if (flags & CL_MEM_COPY_HOST_PTR || flags & CL_MEM_USE_HOST_PTR) cl_buffer_subdata(mem->bo, 0, sz, data); if (flags & CL_MEM_USE_HOST_PTR || flags & CL_MEM_COPY_HOST_PTR) mem->host_ptr = data; exit: if (errcode_ret) *errcode_ret = err; return mem; error: cl_mem_delete(mem); mem = NULL; goto exit; } void cl_mem_copy_image_region(const size_t *origin, const size_t *region, void *dst, size_t dst_row_pitch, size_t dst_slice_pitch, const void *src, size_t src_row_pitch, size_t src_slice_pitch, const struct _cl_mem_image *image) { size_t offset = image->bpp * origin[0] + dst_row_pitch * origin[1] + dst_slice_pitch * origin[2]; dst = (char*)dst + offset; if (!origin[0] && region[0] == image->w && dst_row_pitch == src_row_pitch && (region[2] == 1 || (!origin[1] && region[1] == image->h && dst_slice_pitch == src_slice_pitch))) { memcpy(dst, src, region[2] == 1 ? src_row_pitch*region[1] : src_slice_pitch*region[2]); } else { cl_uint y, z; for (z = 0; z < region[2]; z++) { const char* src_ptr = src; char* dst_ptr = dst; for (y = 0; y < region[1]; y++) { memcpy(dst_ptr, src_ptr, image->bpp*region[0]); src_ptr += src_row_pitch; dst_ptr += dst_row_pitch; } src = (char*)src + src_slice_pitch; dst = (char*)dst + dst_slice_pitch; } } } static void cl_mem_copy_image(struct _cl_mem_image *image, size_t row_pitch, size_t slice_pitch, void* host_ptr) { char* dst_ptr = cl_mem_map_auto((cl_mem)image); size_t origin[3] = {0, 0, 0}; size_t region[3] = {image->w, image->h, image->depth}; cl_mem_copy_image_region(origin, region, dst_ptr, image->row_pitch, image->slice_pitch, host_ptr, row_pitch, slice_pitch, image); cl_mem_unmap_auto((cl_mem)image); } static const uint32_t tile_sz = 4096; /* 4KB per tile */ static const uint32_t tilex_w = 512; /* tileX width in bytes */ static const uint32_t tilex_h = 8; /* tileX height in number of rows */ static const uint32_t tiley_w = 128; /* tileY width in bytes */ static const uint32_t tiley_h = 32; /* tileY height in number of rows */ static cl_mem _cl_mem_new_image(cl_context ctx, cl_mem_flags flags, const cl_image_format *fmt, const cl_mem_object_type image_type, size_t w, size_t h, size_t depth, size_t pitch, size_t slice_pitch, void *data, cl_int *errcode_ret) { cl_int err = CL_SUCCESS; cl_mem mem = NULL; uint32_t bpp = 0, intel_fmt = INTEL_UNSUPPORTED_FORMAT; size_t sz = 0, aligned_pitch = 0, aligned_slice_pitch = 0, aligned_h; cl_image_tiling_t tiling = CL_NO_TILE; /* Check flags consistency */ if (UNLIKELY((flags & (CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR)) && data == NULL)) { err = CL_INVALID_HOST_PTR; goto error; } /* Get the size of each pixel */ if (UNLIKELY((err = cl_image_byte_per_pixel(fmt, &bpp)) != CL_SUCCESS)) goto error; /* Only a sub-set of the formats are supported */ intel_fmt = cl_image_get_intel_format(fmt); if (UNLIKELY(intel_fmt == INTEL_UNSUPPORTED_FORMAT)) { err = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; goto error; } /* See if the user parameters match */ #define DO_IMAGE_ERROR \ do { \ err = CL_INVALID_IMAGE_SIZE; \ goto error; \ } while (0); if (UNLIKELY(w == 0)) DO_IMAGE_ERROR; if (UNLIKELY(h == 0)) DO_IMAGE_ERROR; if (image_type == CL_MEM_OBJECT_IMAGE2D) { size_t min_pitch = bpp * w; if (data && pitch == 0) pitch = min_pitch; if (UNLIKELY(w > ctx->device->image2d_max_width)) DO_IMAGE_ERROR; if (UNLIKELY(h > ctx->device->image2d_max_height)) DO_IMAGE_ERROR; if (UNLIKELY(data && min_pitch > pitch)) DO_IMAGE_ERROR; if (UNLIKELY(!data && pitch != 0)) DO_IMAGE_ERROR; /* Pick up tiling mode (we do only linear on SNB) */ if (cl_driver_get_ver(ctx->drv) != 6) tiling = CL_TILE_Y; depth = 1; } if (image_type == CL_MEM_OBJECT_IMAGE3D) { size_t min_pitch = bpp * w; if (data && pitch == 0) pitch = min_pitch; size_t min_slice_pitch = min_pitch * h; if (data && slice_pitch == 0) slice_pitch = min_slice_pitch; if (UNLIKELY(w > ctx->device->image3d_max_width)) DO_IMAGE_ERROR; if (UNLIKELY(h > ctx->device->image3d_max_height)) DO_IMAGE_ERROR; if (UNLIKELY(depth > ctx->device->image3d_max_depth)) DO_IMAGE_ERROR; if (UNLIKELY(data && min_pitch > pitch)) DO_IMAGE_ERROR; if (UNLIKELY(data && min_slice_pitch > slice_pitch)) DO_IMAGE_ERROR; if (UNLIKELY(!data && pitch != 0)) DO_IMAGE_ERROR; if (UNLIKELY(!data && slice_pitch != 0)) DO_IMAGE_ERROR; /* Pick up tiling mode (we do only linear on SNB) */ if (cl_driver_get_ver(ctx->drv) != 6) tiling = CL_TILE_Y; } #undef DO_IMAGE_ERROR /* Tiling requires to align both pitch and height */ if (tiling == CL_NO_TILE) { aligned_pitch = w * bpp; aligned_h = h; } else if (tiling == CL_TILE_X) { aligned_pitch = ALIGN(w * bpp, tilex_w); aligned_h = ALIGN(h, tilex_h); } else if (tiling == CL_TILE_Y) { aligned_pitch = ALIGN(w * bpp, tiley_w); aligned_h = ALIGN(h, tiley_h); } sz = aligned_pitch * aligned_h * depth; mem = cl_mem_allocate(CL_MEM_IMAGE_TYPE, ctx, flags, sz, tiling != CL_NO_TILE, &err); if (mem == NULL || err != CL_SUCCESS) goto error; cl_buffer_set_tiling(mem->bo, tiling, aligned_pitch); aligned_slice_pitch = (image_type == CL_MEM_OBJECT_IMAGE1D || image_type == CL_MEM_OBJECT_IMAGE2D) ? 0 : aligned_pitch * ALIGN(h, 2); cl_mem_image_init(cl_mem_image(mem), w, h, image_type, depth, *fmt, intel_fmt, bpp, aligned_pitch, aligned_slice_pitch, tiling, 0, 0, 0); /* Copy the data if required */ if (flags & (CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR)) { cl_mem_copy_image(cl_mem_image(mem), pitch, slice_pitch, data); if (flags & CL_MEM_USE_HOST_PTR) { mem->host_ptr = data; cl_mem_image(mem)->host_row_pitch = pitch; cl_mem_image(mem)->host_slice_pitch = slice_pitch; } } exit: if (errcode_ret) *errcode_ret = err; return mem; error: cl_mem_delete(mem); mem = NULL; goto exit; } LOCAL cl_mem cl_mem_new_image(cl_context context, cl_mem_flags flags, const cl_image_format *image_format, const cl_image_desc *image_desc, void *host_ptr, cl_int *errcode_ret) { switch (image_desc->image_type) { case CL_MEM_OBJECT_IMAGE1D: case CL_MEM_OBJECT_IMAGE2D: case CL_MEM_OBJECT_IMAGE3D: return _cl_mem_new_image(context, flags, image_format, image_desc->image_type, image_desc->image_width, image_desc->image_height, image_desc->image_depth, image_desc->image_row_pitch, image_desc->image_slice_pitch, host_ptr, errcode_ret); case CL_MEM_OBJECT_IMAGE2D_ARRAY: case CL_MEM_OBJECT_IMAGE1D_ARRAY: case CL_MEM_OBJECT_IMAGE1D_BUFFER: NOT_IMPLEMENTED; break; case CL_MEM_OBJECT_BUFFER: default: assert(0); } return NULL; } LOCAL void cl_mem_delete(cl_mem mem) { cl_int i; if (UNLIKELY(mem == NULL)) return; if (atomic_dec(&mem->ref_n) > 1) return; #ifdef HAS_EGL if (UNLIKELY(IS_GL_IMAGE(mem))) { cl_mem_gl_delete(cl_mem_gl_image(mem)); } #endif if (LIKELY(mem->bo != NULL)) cl_buffer_unreference(mem->bo); /* Remove it from the list */ assert(mem->ctx); pthread_mutex_lock(&mem->ctx->buffer_lock); if (mem->prev) mem->prev->next = mem->next; if (mem->next) mem->next->prev = mem->prev; if (mem->prev == NULL && mem->next == NULL) mem->ctx->buffers = NULL; pthread_mutex_unlock(&mem->ctx->buffer_lock); cl_context_delete(mem->ctx); /* Someone still mapped, unmap */ if(mem->map_ref > 0) { assert(mem->mapped_ptr); for(i=0; imapped_ptr_sz; i++) { if(mem->mapped_ptr[i].ptr != NULL) { mem->map_ref--; cl_mem_unmap_gtt(mem); } } assert(mem->map_ref == 0); } if (mem->mapped_ptr) free(mem->mapped_ptr); if (mem->dstr_cb) { cl_mem_dstr_cb *cb = mem->dstr_cb; while (mem->dstr_cb) { cb = mem->dstr_cb; cb->pfn_notify(mem, cb->user_data); mem->dstr_cb = cb->next; free(cb); } } cl_free(mem); } LOCAL void cl_mem_add_ref(cl_mem mem) { assert(mem); atomic_inc(&mem->ref_n); } #define LOCAL_SZ_0 16 #define LOCAL_SZ_1 4 #define LOCAL_SZ_2 4 LOCAL cl_int cl_mem_copy(cl_command_queue queue, cl_mem src_buf, cl_mem dst_buf, size_t src_offset, size_t dst_offset, size_t cb) { cl_int ret; cl_kernel ker; size_t global_off[] = {0,0,0}; size_t global_sz[] = {1,1,1}; size_t local_sz[] = {1,1,1}; /* We use one kernel to copy the data. The kernel is lazily created. */ assert(src_buf->ctx == dst_buf->ctx); if ((cb % 4) || (src_offset % 4) || (dst_offset % 4)) { extern char cl_internal_copy_buf_align1_str[]; extern int cl_internal_copy_buf_align1_str_size; ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_ALIGN1, cl_internal_copy_buf_align1_str, (size_t)cl_internal_copy_buf_align1_str_size, NULL); } else if ((cb % 16) || (src_offset % 16) || (dst_offset % 16)) { extern char cl_internal_copy_buf_align4_str[]; extern int cl_internal_copy_buf_align4_str_size; ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_ALIGN4, cl_internal_copy_buf_align4_str, (size_t)cl_internal_copy_buf_align4_str_size, NULL); cb = cb/4; src_offset = src_offset/4; dst_offset = dst_offset/4; } else { extern char cl_internal_copy_buf_align16_str[]; extern int cl_internal_copy_buf_align16_str_size; ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_ALIGN16, cl_internal_copy_buf_align16_str, (size_t)cl_internal_copy_buf_align16_str_size, NULL); cb = cb/16; src_offset = src_offset/4; dst_offset = dst_offset/4; } if (!ker) return CL_OUT_OF_RESOURCES; if (cb < LOCAL_SZ_0) { local_sz[0] = 1; } else { local_sz[0] = LOCAL_SZ_0; } global_sz[0] = ((cb + LOCAL_SZ_0 - 1)/LOCAL_SZ_0)*LOCAL_SZ_0; cl_kernel_set_arg(ker, 0, sizeof(cl_mem), &src_buf); cl_kernel_set_arg(ker, 1, sizeof(int), &src_offset); cl_kernel_set_arg(ker, 2, sizeof(cl_mem), &dst_buf); cl_kernel_set_arg(ker, 3, sizeof(int), &dst_offset); cl_kernel_set_arg(ker, 4, sizeof(int), &cb); ret = cl_command_queue_ND_range(queue, ker, 1, global_off, global_sz, local_sz); return ret; } LOCAL cl_int cl_mem_copy_buffer_rect(cl_command_queue queue, cl_mem src_buf, cl_mem dst_buf, const size_t *src_origin, const size_t *dst_origin, const size_t *region, size_t src_row_pitch, size_t src_slice_pitch, size_t dst_row_pitch, size_t dst_slice_pitch) { cl_int ret; cl_kernel ker; size_t global_off[] = {0,0,0}; size_t global_sz[] = {1,1,1}; size_t local_sz[] = {LOCAL_SZ_0,LOCAL_SZ_1,LOCAL_SZ_1}; if(region[1] == 1) local_sz[1] = 1; if(region[2] == 1) local_sz[2] = 1; global_sz[0] = ((region[0] + local_sz[0] - 1) / local_sz[0]) * local_sz[0]; global_sz[1] = ((region[1] + local_sz[1] - 1) / local_sz[1]) * local_sz[1]; global_sz[2] = ((region[2] + local_sz[2] - 1) / local_sz[2]) * local_sz[2]; cl_int index = CL_ENQUEUE_COPY_BUFFER_RECT; cl_int src_offset = src_origin[2]*src_slice_pitch + src_origin[1]*src_row_pitch + src_origin[0]; cl_int dst_offset = dst_origin[2]*dst_slice_pitch + dst_origin[1]*dst_row_pitch + dst_origin[0]; static const char *str_kernel = "kernel void __cl_cpy_buffer_rect ( \n" " global char* src, global char* dst, \n" " unsigned int region0, unsigned int region1, unsigned int region2, \n" " unsigned int src_offset, unsigned int dst_offset, \n" " unsigned int src_row_pitch, unsigned int src_slice_pitch, \n" " unsigned int dst_row_pitch, unsigned int dst_slice_pitch) { \n" " int i = get_global_id(0); \n" " int j = get_global_id(1); \n" " int k = get_global_id(2); \n" " if((i >= region0) || (j>= region1) || (k>=region2)) \n" " return; \n" " src_offset += k * src_slice_pitch + j * src_row_pitch + i; \n" " dst_offset += k * dst_slice_pitch + j * dst_row_pitch + i; \n" " dst[dst_offset] = src[src_offset]; \n" "}"; /* We use one kernel to copy the data. The kernel is lazily created. */ assert(src_buf->ctx == dst_buf->ctx); /* setup the kernel and run. */ ker = cl_context_get_static_kernel(queue->ctx, index, str_kernel, NULL); if (!ker) return CL_OUT_OF_RESOURCES; cl_kernel_set_arg(ker, 0, sizeof(cl_mem), &src_buf); cl_kernel_set_arg(ker, 1, sizeof(cl_mem), &dst_buf); cl_kernel_set_arg(ker, 2, sizeof(cl_int), ®ion[0]); cl_kernel_set_arg(ker, 3, sizeof(cl_int), ®ion[1]); cl_kernel_set_arg(ker, 4, sizeof(cl_int), ®ion[2]); cl_kernel_set_arg(ker, 5, sizeof(cl_int), &src_offset); cl_kernel_set_arg(ker, 6, sizeof(cl_int), &dst_offset); cl_kernel_set_arg(ker, 7, sizeof(cl_int), &src_row_pitch); cl_kernel_set_arg(ker, 8, sizeof(cl_int), &src_slice_pitch); cl_kernel_set_arg(ker, 9, sizeof(cl_int), &dst_row_pitch); cl_kernel_set_arg(ker, 10, sizeof(cl_int), &dst_slice_pitch); ret = cl_command_queue_ND_range(queue, ker, 1, global_off, global_sz, local_sz); return ret; } LOCAL cl_int cl_mem_kernel_copy_image(cl_command_queue queue, struct _cl_mem_image* src_image, struct _cl_mem_image* dst_image, const size_t *src_origin, const size_t *dst_origin, const size_t *region) { cl_int ret; cl_kernel ker; size_t global_off[] = {0,0,0}; size_t global_sz[] = {1,1,1}; size_t local_sz[] = {LOCAL_SZ_0,LOCAL_SZ_1,LOCAL_SZ_2}; cl_int index = CL_ENQUEUE_COPY_IMAGE_0; char option[40] = ""; uint32_t fixupDataType; uint32_t savedIntelFmt; if(region[1] == 1) local_sz[1] = 1; if(region[2] == 1) local_sz[2] = 1; global_sz[0] = ((region[0] + local_sz[0] - 1) / local_sz[0]) * local_sz[0]; global_sz[1] = ((region[1] + local_sz[1] - 1) / local_sz[1]) * local_sz[1]; global_sz[2] = ((region[2] + local_sz[2] - 1) / local_sz[2]) * local_sz[2]; if(src_image->image_type == CL_MEM_OBJECT_IMAGE3D) { strcat(option, "-D SRC_IMAGE_3D"); index += 1; } if(dst_image->image_type == CL_MEM_OBJECT_IMAGE3D) { strcat(option, " -D DST_IMAGE_3D"); index += 2; } switch (src_image->fmt.image_channel_data_type) { case CL_SNORM_INT8: case CL_UNORM_INT8: fixupDataType = CL_UNSIGNED_INT8; break; case CL_HALF_FLOAT: case CL_SNORM_INT16: case CL_UNORM_INT16: fixupDataType = CL_UNSIGNED_INT16; break; case CL_FLOAT: fixupDataType = CL_UNSIGNED_INT32; break; default: fixupDataType = 0; } if (fixupDataType) { cl_image_format fmt; if (src_image->fmt.image_channel_order != CL_BGRA) fmt.image_channel_order = src_image->fmt.image_channel_order; else fmt.image_channel_order = CL_RGBA; fmt.image_channel_data_type = fixupDataType; savedIntelFmt = src_image->intel_fmt; src_image->intel_fmt = cl_image_get_intel_format(&fmt); dst_image->intel_fmt = src_image->intel_fmt; } static const char *str_kernel = "#ifdef SRC_IMAGE_3D \n" " #define SRC_IMAGE_TYPE image3d_t \n" " #define SRC_COORD_TYPE int4 \n" "#else \n" " #define SRC_IMAGE_TYPE image2d_t \n" " #define SRC_COORD_TYPE int2 \n" "#endif \n" "#ifdef DST_IMAGE_3D \n" " #define DST_IMAGE_TYPE image3d_t \n" " #define DST_COORD_TYPE int4 \n" "#else \n" " #define DST_IMAGE_TYPE image2d_t \n" " #define DST_COORD_TYPE int2 \n" "#endif \n" "kernel void __cl_copy_image ( \n" " __read_only SRC_IMAGE_TYPE src_image, __write_only DST_IMAGE_TYPE dst_image, \n" " unsigned int region0, unsigned int region1, unsigned int region2, \n" " unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2, \n" " unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2) { \n" " int i = get_global_id(0); \n" " int j = get_global_id(1); \n" " int k = get_global_id(2); \n" " int4 color; \n" " const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; \n" " SRC_COORD_TYPE src_coord; \n" " DST_COORD_TYPE dst_coord; \n" " if((i >= region0) || (j>= region1) || (k>=region2)) \n" " return; \n" " src_coord.x = src_origin0 + i; \n" " src_coord.y = src_origin1 + j; \n" "#ifdef SRC_IMAGE_3D \n" " src_coord.z = src_origin2 + k; \n" "#endif \n" " dst_coord.x = dst_origin0 + i; \n" " dst_coord.y = dst_origin1 + j; \n" "#ifdef DST_IMAGE_3D \n" " dst_coord.z = dst_origin2 + k; \n" "#endif \n" " color = read_imagei(src_image, sampler, src_coord); \n" " write_imagei(dst_image, dst_coord, color); \n" "}"; /* We use one kernel to copy the data. The kernel is lazily created. */ assert(src_image->base.ctx == dst_image->base.ctx); /* setup the kernel and run. */ ker = cl_context_get_static_kernel(queue->ctx, index, str_kernel, option); if (!ker) { ret = CL_OUT_OF_RESOURCES; goto fail; } cl_kernel_set_arg(ker, 0, sizeof(cl_mem), &src_image); cl_kernel_set_arg(ker, 1, sizeof(cl_mem), &dst_image); cl_kernel_set_arg(ker, 2, sizeof(cl_int), ®ion[0]); cl_kernel_set_arg(ker, 3, sizeof(cl_int), ®ion[1]); cl_kernel_set_arg(ker, 4, sizeof(cl_int), ®ion[2]); cl_kernel_set_arg(ker, 5, sizeof(cl_int), &src_origin[0]); cl_kernel_set_arg(ker, 6, sizeof(cl_int), &src_origin[1]); cl_kernel_set_arg(ker, 7, sizeof(cl_int), &src_origin[2]); cl_kernel_set_arg(ker, 8, sizeof(cl_int), &dst_origin[0]); cl_kernel_set_arg(ker, 9, sizeof(cl_int), &dst_origin[1]); cl_kernel_set_arg(ker, 10, sizeof(cl_int), &dst_origin[2]); ret = cl_command_queue_ND_range(queue, ker, 1, global_off, global_sz, local_sz); fail: if (fixupDataType) { src_image->intel_fmt = savedIntelFmt; dst_image->intel_fmt = savedIntelFmt; } return ret; } LOCAL cl_int cl_mem_copy_image_to_buffer(cl_command_queue queue, struct _cl_mem_image* image, cl_mem buffer, const size_t *src_origin, const size_t dst_offset, const size_t *region) { cl_int ret; cl_kernel ker; size_t global_off[] = {0,0,0}; size_t global_sz[] = {1,1,1}; size_t local_sz[] = {LOCAL_SZ_0,LOCAL_SZ_1,LOCAL_SZ_2}; cl_int index = CL_ENQUEUE_COPY_IMAGE_TO_BUFFER_0; char option[40] = ""; uint32_t intel_fmt, bpp; cl_image_format fmt; size_t origin0, region0; if(region[1] == 1) local_sz[1] = 1; if(region[2] == 1) local_sz[2] = 1; global_sz[0] = ((region[0] + local_sz[0] - 1) / local_sz[0]) * local_sz[0]; global_sz[1] = ((region[1] + local_sz[1] - 1) / local_sz[1]) * local_sz[1]; global_sz[2] = ((region[2] + local_sz[2] - 1) / local_sz[2]) * local_sz[2]; if(image->image_type == CL_MEM_OBJECT_IMAGE3D) { strcat(option, "-D IMAGE_3D"); index += 1; } static const char *str_kernel = "#ifdef IMAGE_3D \n" " #define IMAGE_TYPE image3d_t \n" " #define COORD_TYPE int4 \n" "#else \n" " #define IMAGE_TYPE image2d_t \n" " #define COORD_TYPE int2 \n" "#endif \n" "kernel void __cl_copy_image_to_buffer ( \n" " __read_only IMAGE_TYPE image, global uchar* buffer, \n" " unsigned int region0, unsigned int region1, unsigned int region2, \n" " unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2, \n" " unsigned int dst_offset) { \n" " int i = get_global_id(0); \n" " int j = get_global_id(1); \n" " int k = get_global_id(2); \n" " uint4 color; \n" " const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; \n" " COORD_TYPE src_coord; \n" " if((i >= region0) || (j>= region1) || (k>=region2)) \n" " return; \n" " src_coord.x = src_origin0 + i; \n" " src_coord.y = src_origin1 + j; \n" "#ifdef IMAGE_3D \n" " src_coord.z = src_origin2 + k; \n" "#endif \n" " color = read_imageui(image, sampler, src_coord); \n" " dst_offset += (k * region1 + j) * region0 + i; \n" " buffer[dst_offset] = color.x; \n" "}"; /* We use one kernel to copy the data. The kernel is lazily created. */ assert(image->base.ctx == buffer->ctx); fmt.image_channel_order = CL_R; fmt.image_channel_data_type = CL_UNSIGNED_INT8; intel_fmt = image->intel_fmt; bpp = image->bpp; image->intel_fmt = cl_image_get_intel_format(&fmt); image->w = image->w * image->bpp; image->bpp = 1; region0 = region[0] * bpp; origin0 = src_origin[0] * bpp; global_sz[0] = ((region0 + local_sz[0] - 1) / local_sz[0]) * local_sz[0]; /* setup the kernel and run. */ ker = cl_context_get_static_kernel(queue->ctx, index, str_kernel, option); if (!ker) { ret = CL_OUT_OF_RESOURCES; goto fail; } cl_kernel_set_arg(ker, 0, sizeof(cl_mem), &image); cl_kernel_set_arg(ker, 1, sizeof(cl_mem), &buffer); cl_kernel_set_arg(ker, 2, sizeof(cl_int), ®ion0); cl_kernel_set_arg(ker, 3, sizeof(cl_int), ®ion[1]); cl_kernel_set_arg(ker, 4, sizeof(cl_int), ®ion[2]); cl_kernel_set_arg(ker, 5, sizeof(cl_int), &origin0); cl_kernel_set_arg(ker, 6, sizeof(cl_int), &src_origin[1]); cl_kernel_set_arg(ker, 7, sizeof(cl_int), &src_origin[2]); cl_kernel_set_arg(ker, 8, sizeof(cl_int), &dst_offset); ret = cl_command_queue_ND_range(queue, ker, 1, global_off, global_sz, local_sz); fail: image->intel_fmt = intel_fmt; image->bpp = bpp; image->w = image->w / bpp; return ret; } LOCAL cl_int cl_mem_copy_buffer_to_image(cl_command_queue queue, cl_mem buffer, struct _cl_mem_image* image, const size_t src_offset, const size_t *dst_origin, const size_t *region) { cl_int ret; cl_kernel ker; size_t global_off[] = {0,0,0}; size_t global_sz[] = {1,1,1}; size_t local_sz[] = {LOCAL_SZ_0,LOCAL_SZ_1,LOCAL_SZ_2}; cl_int index = CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_0; char option[40] = ""; uint32_t intel_fmt, bpp; cl_image_format fmt; size_t origin0, region0; if(region[1] == 1) local_sz[1] = 1; if(region[2] == 1) local_sz[2] = 1; global_sz[0] = ((region[0] + local_sz[0] - 1) / local_sz[0]) * local_sz[0]; global_sz[1] = ((region[1] + local_sz[1] - 1) / local_sz[1]) * local_sz[1]; global_sz[2] = ((region[2] + local_sz[2] - 1) / local_sz[2]) * local_sz[2]; if(image->image_type == CL_MEM_OBJECT_IMAGE3D) { strcat(option, "-D IMAGE_3D"); index += 1; } static const char *str_kernel = "#ifdef IMAGE_3D \n" " #define IMAGE_TYPE image3d_t \n" " #define COORD_TYPE int4 \n" "#else \n" " #define IMAGE_TYPE image2d_t \n" " #define COORD_TYPE int2 \n" "#endif \n" "kernel void __cl_copy_image_to_buffer ( \n" " __read_only IMAGE_TYPE image, global uchar* buffer, \n" " unsigned int region0, unsigned int region1, unsigned int region2, \n" " unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2, \n" " unsigned int src_offset) { \n" " int i = get_global_id(0); \n" " int j = get_global_id(1); \n" " int k = get_global_id(2); \n" " uint4 color = (uint4)(0); \n" " COORD_TYPE dst_coord; \n" " if((i >= region0) || (j>= region1) || (k>=region2)) \n" " return; \n" " dst_coord.x = dst_origin0 + i; \n" " dst_coord.y = dst_origin1 + j; \n" "#ifdef IMAGE_3D \n" " dst_coord.z = dst_origin2 + k; \n" "#endif \n" " src_offset += (k * region1 + j) * region0 + i; \n" " color.x = buffer[src_offset]; \n" " write_imageui(image, dst_coord, color); \n" "}"; /* We use one kernel to copy the data. The kernel is lazily created. */ assert(image->base.ctx == buffer->ctx); fmt.image_channel_order = CL_R; fmt.image_channel_data_type = CL_UNSIGNED_INT8; intel_fmt = image->intel_fmt; bpp = image->bpp; image->intel_fmt = cl_image_get_intel_format(&fmt); image->w = image->w * image->bpp; image->bpp = 1; region0 = region[0] * bpp; origin0 = dst_origin[0] * bpp; global_sz[0] = ((region0 + local_sz[0] - 1) / local_sz[0]) * local_sz[0]; /* setup the kernel and run. */ ker = cl_context_get_static_kernel(queue->ctx, index, str_kernel, option); if (!ker) return CL_OUT_OF_RESOURCES; cl_kernel_set_arg(ker, 0, sizeof(cl_mem), &image); cl_kernel_set_arg(ker, 1, sizeof(cl_mem), &buffer); cl_kernel_set_arg(ker, 2, sizeof(cl_int), ®ion0); cl_kernel_set_arg(ker, 3, sizeof(cl_int), ®ion[1]); cl_kernel_set_arg(ker, 4, sizeof(cl_int), ®ion[2]); cl_kernel_set_arg(ker, 5, sizeof(cl_int), &origin0); cl_kernel_set_arg(ker, 6, sizeof(cl_int), &dst_origin[1]); cl_kernel_set_arg(ker, 7, sizeof(cl_int), &dst_origin[2]); cl_kernel_set_arg(ker, 8, sizeof(cl_int), &src_offset); ret = cl_command_queue_ND_range(queue, ker, 1, global_off, global_sz, local_sz); image->intel_fmt = intel_fmt; image->bpp = bpp; image->w = image->w / bpp; return ret; } LOCAL void* cl_mem_map(cl_mem mem) { cl_buffer_map(mem->bo, 1); assert(cl_buffer_get_virtual(mem->bo)); return cl_buffer_get_virtual(mem->bo); } LOCAL cl_int cl_mem_unmap(cl_mem mem) { cl_buffer_unmap(mem->bo); return CL_SUCCESS; } LOCAL void* cl_mem_map_gtt(cl_mem mem) { cl_buffer_map_gtt(mem->bo); assert(cl_buffer_get_virtual(mem->bo)); return cl_buffer_get_virtual(mem->bo); } LOCAL void * cl_mem_map_gtt_unsync(cl_mem mem) { cl_buffer_map_gtt_unsync(mem->bo); assert(cl_buffer_get_virtual(mem->bo)); return cl_buffer_get_virtual(mem->bo); } LOCAL cl_int cl_mem_unmap_gtt(cl_mem mem) { cl_buffer_unmap_gtt(mem->bo); return CL_SUCCESS; } LOCAL void* cl_mem_map_auto(cl_mem mem) { if (IS_IMAGE(mem) && cl_mem_image(mem)->tiling != CL_NO_TILE) return cl_mem_map_gtt(mem); else return cl_mem_map(mem); } LOCAL cl_int cl_mem_unmap_auto(cl_mem mem) { if (IS_IMAGE(mem) && cl_mem_image(mem)->tiling != CL_NO_TILE) cl_buffer_unmap_gtt(mem->bo); else cl_buffer_unmap(mem->bo); return CL_SUCCESS; } LOCAL cl_int cl_mem_pin(cl_mem mem) { assert(mem); if (UNLIKELY((mem->flags & CL_MEM_PINNABLE) == 0)) return CL_INVALID_MEM_OBJECT; cl_buffer_pin(mem->bo, 4096); return CL_SUCCESS; } LOCAL cl_int cl_mem_unpin(cl_mem mem) { assert(mem); if (UNLIKELY((mem->flags & CL_MEM_PINNABLE) == 0)) return CL_INVALID_MEM_OBJECT; cl_buffer_unpin(mem->bo); return CL_SUCCESS; } LOCAL cl_mem cl_mem_new_libva_buffer(cl_context ctx, unsigned int bo_name, cl_int* errcode) { cl_int err = CL_SUCCESS; cl_mem mem = NULL; mem = cl_mem_allocate(CL_MEM_BUFFER_TYPE, ctx, 0, 0, CL_FALSE, &err); if (mem == NULL || err != CL_SUCCESS) goto error; size_t sz = 0; mem->bo = cl_buffer_get_buffer_from_libva(ctx, bo_name, &sz); mem->size = sz; exit: if (errcode) *errcode = err; return mem; error: cl_mem_delete(mem); mem = NULL; goto exit; } LOCAL cl_mem cl_mem_new_libva_image(cl_context ctx, unsigned int bo_name, size_t offset, size_t width, size_t height, cl_image_format fmt, size_t row_pitch, cl_int *errcode) { cl_int err = CL_SUCCESS; cl_mem mem = NULL; struct _cl_mem_image *image = NULL; uint32_t intel_fmt, bpp; intel_fmt = cl_image_get_intel_format(&fmt); if (intel_fmt == INTEL_UNSUPPORTED_FORMAT) { err = CL_IMAGE_FORMAT_NOT_SUPPORTED; goto error; } cl_image_byte_per_pixel(&fmt, &bpp); mem = cl_mem_allocate(CL_MEM_IMAGE_TYPE, ctx, 0, 0, 0, &err); if (mem == NULL || err != CL_SUCCESS) { err = CL_OUT_OF_HOST_MEMORY; goto error; } image = cl_mem_image(mem); mem->bo = cl_buffer_get_image_from_libva(ctx, bo_name, image); image->w = width; image->h = height; image->image_type = CL_MEM_OBJECT_IMAGE2D; image->depth = 2; image->fmt = fmt; image->intel_fmt = intel_fmt; image->bpp = bpp; image->row_pitch = row_pitch; image->slice_pitch = 0; // NOTE: tiling of image is set in cl_buffer_get_image_from_libva(). image->tile_x = 0; image->tile_y = 0; image->offset = offset; exit: if (errcode) *errcode = err; return mem; error: cl_mem_delete(mem); mem = NULL; goto exit; } Release_v0.3/src/cl_mem.h000066400000000000000000000220361223142177000154020ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #ifndef __CL_MEM_H__ #define __CL_MEM_H__ #include "cl_internals.h" #include "cl_driver_type.h" #include "CL/cl.h" #include "cl_khr_icd.h" #include #ifndef CL_VERSION_1_2 #define CL_MEM_OBJECT_IMAGE1D 0x10F4 #define CL_MEM_OBJECT_IMAGE1D_ARRAY 0x10F5 #define CL_MEM_OBJECT_IMAGE1D_BUFFER 0x10F6 #define CL_MEM_OBJECT_IMAGE2D_ARRAY 0x10F3 typedef struct _cl_image_desc { cl_mem_object_type image_type; size_t image_width; size_t image_height; size_t image_depth; size_t image_array_size; size_t image_row_pitch; size_t image_slice_pitch; cl_uint num_mip_levels; cl_uint num_samples; cl_mem buffer; } cl_image_desc; #endif typedef enum cl_image_tiling { CL_NO_TILE = 0, CL_TILE_X = 1, CL_TILE_Y = 2 } cl_image_tiling_t; typedef struct _cl_mapped_ptr { void * ptr; void * v_ptr; size_t size; }cl_mapped_ptr; typedef struct _cl_mem_dstr_cb { struct _cl_mem_dstr_cb * next; void (CL_CALLBACK *pfn_notify)(cl_mem memobj, void *user_data); void *user_data; }cl_mem_dstr_cb; /* Used for buffers and images */ enum cl_mem_type { CL_MEM_BUFFER_TYPE, CL_MEM_IMAGE_TYPE, CL_MEM_GL_IMAGE_TYPE, }; #define IS_IMAGE(mem) (mem->type >= CL_MEM_IMAGE_TYPE) #define IS_GL_IMAGE(mem) (mem->type == CL_MEM_GL_IMAGE_TYPE) typedef struct _cl_mem { DEFINE_ICD(dispatch) uint64_t magic; /* To identify it as a memory object */ cl_mem prev, next; /* We chain the memory buffers together */ enum cl_mem_type type; volatile int ref_n; /* This object is reference counted */ cl_buffer bo; /* Data in GPU memory */ size_t size; /* original request size, not alignment size, used in constant buffer */ cl_context ctx; /* Context it belongs to */ cl_mem_flags flags; /* Flags specified at the creation time */ void * host_ptr; /* Pointer of the host mem specified by CL_MEM_ALLOC_HOST_PTR */ cl_mapped_ptr* mapped_ptr;/* Store the mapped addresses and size by caller. */ int mapped_ptr_sz; /* The array size of mapped_ptr. */ int map_ref; /* The mapped count. */ cl_mem_dstr_cb *dstr_cb; /* The destroy callback. */ } _cl_mem; struct _cl_mem_image { _cl_mem base; cl_image_format fmt; /* only for images */ uint32_t intel_fmt; /* format to provide in the surface state */ uint32_t bpp; /* number of bytes per pixel */ cl_mem_object_type image_type; /* only for images 1D/2D...*/ size_t w, h, depth; /* only for images (depth is only for 3D images) */ size_t row_pitch, slice_pitch; size_t host_row_pitch, host_slice_pitch; cl_image_tiling_t tiling; /* only IVB+ supports TILE_[X,Y] (image only) */ size_t tile_x, tile_y; /* tile offset, used for mipmap images. */ size_t offset; /* offset for dri_bo, used when it's reloc. */ }; struct _cl_mem_gl_image { struct _cl_mem_image base; uint32_t target; int miplevel; uint32_t texture; }; inline static void cl_mem_image_init(struct _cl_mem_image *image, size_t w, size_t h, cl_mem_object_type image_type, size_t depth, cl_image_format fmt, uint32_t intel_fmt, uint32_t bpp, size_t row_pitch, size_t slice_pitch, cl_image_tiling_t tiling, size_t tile_x, size_t tile_y, size_t offset) { image->w = w; image->h = h; image->image_type = image_type; image->depth = depth; image->fmt = fmt; image->intel_fmt = intel_fmt; image->bpp = bpp; image->row_pitch = row_pitch; image->slice_pitch = slice_pitch; image->tiling = tiling; image->tile_x = tile_x; image->tile_y = tile_y; image->offset = offset; } struct _cl_mem_buffer { _cl_mem base; size_t offset; }; inline static struct _cl_mem_image * cl_mem_image(cl_mem mem) { assert(IS_IMAGE(mem)); return (struct _cl_mem_image *)mem; } inline static struct _cl_mem_gl_image * cl_mem_gl_image(cl_mem mem) { assert(IS_GL_IMAGE(mem)); return (struct _cl_mem_gl_image*)mem; } inline static struct _cl_mem_buffer * cl_mem_buffer(cl_mem mem) { assert(!IS_IMAGE(mem)); return (struct _cl_mem_buffer *)mem; } /* Query information about a memory object */ extern cl_int cl_get_mem_object_info(cl_mem, cl_mem_info, size_t, void *, size_t *); /* Query information about an image */ extern cl_int cl_get_image_info(cl_mem, cl_image_info, size_t, void *, size_t *); /* Create a new memory object and initialize it with possible user data */ extern cl_mem cl_mem_new_buffer(cl_context, cl_mem_flags, size_t, void*, cl_int*); /* Idem but this is an image */ extern cl_mem cl_mem_new_image(cl_context context, cl_mem_flags flags, const cl_image_format *image_format, const cl_image_desc *image_desc, void *host_ptr, cl_int *errcode_ret); /* Unref the object and delete it if no more reference */ extern void cl_mem_delete(cl_mem); /* Destroy egl image. */ extern void cl_mem_gl_delete(struct _cl_mem_gl_image *); /* Add one more reference to this object */ extern void cl_mem_add_ref(cl_mem); /* api clEnqueueCopyBuffer help function */ extern cl_int cl_mem_copy(cl_command_queue queue, cl_mem src_buf, cl_mem dst_buf, size_t src_offset, size_t dst_offset, size_t cb); /* api clEnqueueCopyBufferRect help function */ extern cl_int cl_mem_copy_buffer_rect(cl_command_queue, cl_mem, cl_mem, const size_t *, const size_t *, const size_t *, size_t, size_t, size_t, size_t); /* api clEnqueueCopyImage help function */ extern cl_int cl_mem_kernel_copy_image(cl_command_queue, struct _cl_mem_image*, struct _cl_mem_image*, const size_t *, const size_t *, const size_t *); /* api clEnqueueCopyImageToBuffer help function */ extern cl_int cl_mem_copy_image_to_buffer(cl_command_queue, struct _cl_mem_image*, cl_mem, const size_t *, const size_t, const size_t *); /* api clEnqueueCopyBufferToImage help function */ extern cl_int cl_mem_copy_buffer_to_image(cl_command_queue, cl_mem, struct _cl_mem_image*, const size_t, const size_t *, const size_t *); /* Directly map a memory object */ extern void *cl_mem_map(cl_mem); /* Unmap a memory object */ extern cl_int cl_mem_unmap(cl_mem); /* Directly map a memory object in GTT mode */ extern void *cl_mem_map_gtt(cl_mem); /* Directly map a memory object in GTT mode, with out waiting gpu idle */ extern void *cl_mem_map_gtt_unsync(cl_mem); /* Unmap a memory object in GTT mode */ extern cl_int cl_mem_unmap_gtt(cl_mem); /* Directly map a memory object - tiled images are mapped in GTT mode */ extern void *cl_mem_map_auto(cl_mem); /* Unmap a memory object - tiled images are unmapped in GTT mode */ extern cl_int cl_mem_unmap_auto(cl_mem); /* Pin/unpin the buffer in memory (you must be root) */ extern cl_int cl_mem_pin(cl_mem); extern cl_int cl_mem_unpin(cl_mem); extern cl_mem cl_mem_allocate(enum cl_mem_type type, cl_context ctx, cl_mem_flags flags, size_t sz, cl_int is_tiled, cl_int *errcode); void cl_mem_copy_image_region(const size_t *origin, const size_t *region, void *dst, size_t dst_row_pitch, size_t dst_slice_pitch, const void *src, size_t src_row_pitch, size_t src_slice_pitch, const struct _cl_mem_image *image); extern cl_mem cl_mem_new_libva_buffer(cl_context ctx, unsigned int bo_name, cl_int *errcode); extern cl_mem cl_mem_new_libva_image(cl_context ctx, unsigned int bo_name, size_t offset, size_t width, size_t height, cl_image_format fmt, size_t row_pitch, cl_int *errcode); #endif /* __CL_MEM_H__ */ Release_v0.3/src/cl_mem_gl.c000066400000000000000000000052101223142177000160520ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Zhigang Gong */ #include #include #include #include #include #include #include "cl_mem.h" #include "cl_image.h" #include "cl_context.h" #include "cl_utils.h" #include "cl_alloc.h" #include "cl_device_id.h" #include "cl_driver.h" #include "cl_platform_id.h" #include "cl_mem_gl.h" #include "CL/cl.h" #include "CL/cl_intel.h" #include "CL/cl_gl.h" LOCAL cl_mem cl_mem_new_gl_buffer(cl_context ctx, cl_mem_flags flags, GLuint buf_obj, cl_int *errcode_ret) { NOT_IMPLEMENTED; } LOCAL cl_mem cl_mem_new_gl_texture(cl_context ctx, cl_mem_flags flags, GLenum texture_target, GLint miplevel, GLuint texture, cl_int *errcode_ret) { cl_int err = CL_SUCCESS; cl_mem mem = NULL; /* Check flags consistency */ if (UNLIKELY(flags & CL_MEM_COPY_HOST_PTR)) { err = CL_INVALID_ARG_VALUE; goto error; } mem = cl_mem_allocate(CL_MEM_GL_IMAGE_TYPE, ctx, flags, 0, 0, &err); if (mem == NULL || err != CL_SUCCESS) goto error; mem->bo = cl_buffer_alloc_from_texture(ctx, texture_target, miplevel, texture, cl_mem_image(mem)); if (UNLIKELY(mem->bo == NULL)) { err = CL_MEM_OBJECT_ALLOCATION_FAILURE; goto error; } cl_mem_gl_image(mem)->target = texture_target; cl_mem_gl_image(mem)->miplevel = miplevel; cl_mem_gl_image(mem)->texture = texture; exit: if (errcode_ret) *errcode_ret = err; return mem; error: cl_mem_delete(mem); mem = NULL; goto exit; } LOCAL void cl_mem_gl_delete(struct _cl_mem_gl_image *gl_image) { if (gl_image->base.base.bo != NULL) cl_buffer_release_from_texture(gl_image->base.base.ctx, gl_image->target, gl_image->miplevel, gl_image->texture); } Release_v0.3/src/cl_mem_gl.h000066400000000000000000000010471223142177000160630ustar00rootroot00000000000000#ifndef __CL_MEM_GL_H__ #define __CL_MEM_GL_H__ #include "cl_mem.h" cl_mem cl_mem_new_gl_buffer(cl_context ctx, cl_mem_flags flags, GLuint buf_obj, cl_int *errcode_ret); cl_mem cl_mem_new_gl_texture(cl_context ctx, cl_mem_flags flags, GLenum texture_target, GLint miplevel, GLuint texture, cl_int *errcode_ret); #endif Release_v0.3/src/cl_platform_id.c000066400000000000000000000076361223142177000171300ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #include "cl_platform_id.h" #include "cl_internals.h" #include "cl_utils.h" #include "CL/cl.h" #include "CL/cl_ext.h" #include #include #define DECL_INFO_STRING(FIELD, STRING) \ .FIELD = STRING, \ .JOIN(FIELD,_sz) = sizeof(STRING), static struct _cl_platform_id intel_platform_data = { INIT_ICD(dispatch) DECL_INFO_STRING(profile, "FULL_PROFILE") DECL_INFO_STRING(version, LIBCL_VERSION_STRING) DECL_INFO_STRING(name, "Experiment Intel Gen OCL Driver") DECL_INFO_STRING(vendor, "Intel") DECL_INFO_STRING(icd_suffix_khr, "Intel") }; #undef DECL_INFO_STRING /* Intel platform (only GPU now) */ cl_platform_id const intel_platform = &intel_platform_data; LOCAL cl_int cl_get_platform_ids(cl_uint num_entries, cl_platform_id * platforms, cl_uint * num_platforms) { if (num_platforms != NULL) *num_platforms = 1; cl_intel_platform_extension_init(intel_platform); /* Easy right now, only one platform is supported */ if(platforms) *platforms = intel_platform; intel_platform->extensions_sz = strlen(intel_platform->extensions) + 1; return CL_SUCCESS; } #define DECL_FIELD(CASE,FIELD) \ case JOIN(CL_,CASE): \ if (param_value_size < intel_platform->JOIN(FIELD,_sz)) \ return CL_INVALID_VALUE; \ if (param_value_size_ret != NULL) \ *param_value_size_ret = intel_platform->JOIN(FIELD,_sz); \ memcpy(param_value, \ intel_platform->FIELD, \ intel_platform->JOIN(FIELD,_sz)); \ return CL_SUCCESS; #define GET_FIELD_SZ(CASE,FIELD) \ case JOIN(CL_,CASE): \ if (param_value_size_ret != NULL) \ *param_value_size_ret = intel_platform->JOIN(FIELD,_sz); \ return CL_SUCCESS; LOCAL cl_int cl_get_platform_info(cl_platform_id platform, cl_platform_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) { if (param_value == NULL) { switch (param_name) { GET_FIELD_SZ (PLATFORM_PROFILE, profile); GET_FIELD_SZ (PLATFORM_VERSION, version); GET_FIELD_SZ (PLATFORM_NAME, name); GET_FIELD_SZ (PLATFORM_VENDOR, vendor); GET_FIELD_SZ (PLATFORM_EXTENSIONS, extensions); GET_FIELD_SZ (PLATFORM_ICD_SUFFIX_KHR, icd_suffix_khr); default: return CL_INVALID_VALUE; } } /* Fetch the platform inform */ switch (param_name) { DECL_FIELD (PLATFORM_PROFILE, profile); DECL_FIELD (PLATFORM_VERSION, version); DECL_FIELD (PLATFORM_NAME, name); DECL_FIELD (PLATFORM_VENDOR, vendor); DECL_FIELD (PLATFORM_EXTENSIONS, extensions); DECL_FIELD (PLATFORM_ICD_SUFFIX_KHR, icd_suffix_khr); default: return CL_INVALID_VALUE; } } #undef DECL_FIELD Release_v0.3/src/cl_platform_id.h000066400000000000000000000047041223142177000171260ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #ifndef __CL_PLATFORM_ID_H__ #define __CL_PLATFORM_ID_H__ #include "cl_internals.h" #include "cl_extensions.h" #include "cl_khr_icd.h" #include "CL/cl.h" #include "src/OCLConfig.h" struct _cl_platform_id { DEFINE_ICD(dispatch) const char *profile; const char *version; const char *name; const char *vendor; char *extensions; const char *icd_suffix_khr; size_t profile_sz; size_t version_sz; size_t name_sz; size_t vendor_sz; size_t extensions_sz; size_t icd_suffix_khr_sz; struct cl_extensions *internal_extensions; }; /* Platform implemented by this run-time */ extern cl_platform_id const intel_platform; /* Return the valid platform */ extern cl_int cl_get_platform_ids(cl_uint num_entries, cl_platform_id * platforms, cl_uint * num_platforms); /* Return information for the current platform */ extern cl_int cl_get_platform_info(cl_platform_id platform, cl_platform_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret); #define _STR(x) #x #define _JOINT(x, y) _STR(x) "." _STR(y) #define LIBCL_DRIVER_VERSION_STRING _JOINT(LIBCL_DRIVER_VERSION_MAJOR, LIBCL_DRIVER_VERSION_MINOR) #define LIBCL_VERSION_STRING "OpenCL " _JOINT(LIBCL_C_VERSION_MAJOR, LIBCL_C_VERSION_MINOR) " beignet " LIBCL_DRIVER_VERSION_STRING #define LIBCL_C_VERSION_STRING "OpenCL C " _JOINT(LIBCL_C_VERSION_MAJOR, LIBCL_C_VERSION_MINOR) " beignet " LIBCL_DRIVER_VERSION_STRING #endif /* __CL_PLATFORM_ID_H__ */ Release_v0.3/src/cl_program.c000066400000000000000000000231511223142177000162650ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #include "cl_kernel.h" #include "cl_program.h" #include "cl_device_id.h" #include "cl_context.h" #include "cl_alloc.h" #include "cl_utils.h" #include "cl_khr_icd.h" #include "CL/cl.h" #include "CL/cl_intel.h" #include #include #include #include #include static void cl_program_release_sources(cl_program p) { if (p->source) { cl_free(p->source); p->source = NULL; } } static void cl_program_release_binary(cl_program p) { if (p->binary) { cl_free(p->binary); p->binary = NULL; } } LOCAL void cl_program_delete(cl_program p) { uint32_t ref, i; if (p == NULL) return; /* We are not done with it yet */ if ((ref = atomic_dec(&p->ref_n)) > 1) return; /* Destroy the sources and binary if still allocated */ cl_program_release_sources(p); cl_program_release_binary(p); /* Release the build options. */ if (p->build_opts) { cl_free(p->build_opts); p->build_opts = NULL; } /* Remove it from the list */ assert(p->ctx); pthread_mutex_lock(&p->ctx->program_lock); if (p->prev) p->prev->next = p->next; if (p->next) p->next->prev = p->prev; if (p->prev == NULL && p->next == NULL) p->ctx->programs = NULL; pthread_mutex_unlock(&p->ctx->program_lock); cl_free(p->bin); /* Free the blob */ for (i = 0; i < p->ker_n; ++i) /* Free the kernels */ cl_kernel_delete(p->ker[i]); cl_free(p->ker); /* Program belongs to their parent context */ cl_context_delete(p->ctx); /* Free the program as allocated by the compiler */ if (p->opaque) gbe_program_delete(p->opaque); p->magic = CL_MAGIC_DEAD_HEADER; /* For safety */ cl_free(p); } LOCAL cl_program cl_program_new(cl_context ctx) { cl_program p = NULL; /* Allocate the structure */ TRY_ALLOC_NO_ERR (p, CALLOC(struct _cl_program)); SET_ICD(p->dispatch) p->ref_n = 1; p->magic = CL_MAGIC_PROGRAM_HEADER; p->ctx = ctx; /* The queue also belongs to its context */ cl_context_add_ref(ctx); exit: return p; error: cl_program_delete(p); goto exit; } LOCAL void cl_program_add_ref(cl_program p) { assert(p); atomic_inc(&p->ref_n); } static cl_int cl_program_load_gen_program(cl_program p) { cl_int err = CL_SUCCESS; uint32_t i; assert(p->opaque != NULL); p->ker_n = gbe_program_get_kernel_num(p->opaque); /* Allocate the kernel array */ TRY_ALLOC (p->ker, CALLOC_ARRAY(cl_kernel, p->ker_n)); for (i = 0; i < p->ker_n; ++i) { const gbe_kernel opaque = gbe_program_get_kernel(p->opaque, i); assert(opaque != NULL); TRY_ALLOC (p->ker[i], cl_kernel_new(p)); cl_kernel_setup(p->ker[i], opaque); } error: return err; } LOCAL cl_program cl_program_create_from_binary(cl_context ctx, cl_uint num_devices, const cl_device_id * devices, const size_t * lengths, const unsigned char ** binaries, cl_int * binary_status, cl_int * errcode_ret) { cl_program program = NULL; cl_int err = CL_SUCCESS; assert(ctx); INVALID_DEVICE_IF (num_devices != 1); INVALID_DEVICE_IF (devices == NULL); INVALID_DEVICE_IF (devices[0] != ctx->device); INVALID_VALUE_IF (binaries == NULL); INVALID_VALUE_IF (lengths == NULL); if (binaries[0] == NULL) { err = CL_INVALID_VALUE; if (binary_status) binary_status[0] = CL_INVALID_VALUE; goto error; } if (lengths[0] == 0) { err = CL_INVALID_VALUE; if (binary_status) binary_status[0] = CL_INVALID_VALUE; goto error; } program = cl_program_new(ctx); // TODO: Need to check the binary format here to return CL_INVALID_BINARY. TRY_ALLOC(program->binary, cl_calloc(lengths[0], sizeof(char))); memcpy(program->binary, binaries[0], lengths[0]); program->binary_sz = lengths[0]; program->source_type = FROM_BINARY; if (binary_status) binary_status[0] = CL_SUCCESS; exit: if (errcode_ret) *errcode_ret = err; return program; error: cl_program_delete(program); program = NULL; goto exit; return CL_SUCCESS; } LOCAL cl_program cl_program_create_from_llvm(cl_context ctx, cl_uint num_devices, const cl_device_id *devices, const char *file_name, cl_int *errcode_ret) { cl_program program = NULL; cl_int err = CL_SUCCESS; assert(ctx); INVALID_DEVICE_IF (num_devices != 1); INVALID_DEVICE_IF (devices == NULL); INVALID_DEVICE_IF (devices[0] != ctx->device); INVALID_VALUE_IF (file_name == NULL); program = cl_program_new(ctx); program->opaque = gbe_program_new_from_llvm(file_name, 0, NULL, NULL); if (UNLIKELY(program->opaque == NULL)) { err = CL_INVALID_PROGRAM; goto error; } /* Create all the kernels */ TRY (cl_program_load_gen_program, program); program->source_type = FROM_LLVM; exit: if (errcode_ret) *errcode_ret = err; return program; error: cl_program_delete(program); program = NULL; goto exit; } LOCAL cl_program cl_program_create_from_source(cl_context ctx, cl_uint count, const char **strings, const size_t *lengths, cl_int *errcode_ret) { cl_program program = NULL; cl_int err = CL_SUCCESS; cl_uint i; int32_t * lens = NULL; int32_t len_total = 0; assert(ctx); char * p = NULL; // the real compilation step will be done at build time since we do not have // yet the compilation options program = cl_program_new(ctx); TRY_ALLOC (lens, cl_calloc(count, sizeof(int32_t))); for (i = 0; i < (int) count; ++i) { size_t len; if (lengths == NULL || lengths[i] == 0) len = strlen(strings[i]); else len = lengths[i]; lens[i] = len; len_total += len; } TRY_ALLOC(program->source, cl_calloc(len_total+1, sizeof(char))); p = program->source; for (i = 0; i < (int) count; ++i) { memcpy(p, strings[i], lens[i]); p += lens[i]; } *p = '\0'; program->source_type = FROM_SOURCE; exit: cl_free(lens); lens = NULL; if (errcode_ret) *errcode_ret = err; return program; error: cl_program_delete(program); program = NULL; goto exit; } LOCAL cl_int cl_program_build(cl_program p, const char *options) { cl_int err = CL_SUCCESS; int i = 0; int copyed = 0; if (options) { if(p->build_opts) { cl_free(p->build_opts); p->build_opts = NULL; } TRY_ALLOC (p->build_opts, cl_calloc(strlen(options) + 1, sizeof(char))); memcpy(p->build_opts, options, strlen(options)); } if (p->source_type == FROM_SOURCE) { p->opaque = gbe_program_new_from_source(p->source, 0, options, NULL, NULL); if (UNLIKELY(p->opaque == NULL)) { err = CL_INVALID_PROGRAM; goto error; } /* Create all the kernels */ TRY (cl_program_load_gen_program, p); p->source_type = FROM_LLVM; } else if (p->source_type == FROM_BINARY) { p->opaque = gbe_program_new_from_binary(p->binary, p->binary_sz); if (UNLIKELY(p->opaque == NULL)) { err = CL_INVALID_PROGRAM; goto error; } /* Create all the kernels */ TRY (cl_program_load_gen_program, p); p->source_type = FROM_LLVM; } for (i = 0; i < p->ker_n; i ++) { const gbe_kernel opaque = gbe_program_get_kernel(p->opaque, i); p->bin_sz += gbe_kernel_get_code_size(opaque); } TRY_ALLOC (p->bin, cl_calloc(p->bin_sz, sizeof(char))); for (i = 0; i < p->ker_n; i ++) { const gbe_kernel opaque = gbe_program_get_kernel(p->opaque, i); size_t sz = gbe_kernel_get_code_size(opaque); memcpy(p->bin + copyed, gbe_kernel_get_code(opaque), sz); copyed += sz; } p->is_built = 1; error: return err; } LOCAL cl_kernel cl_program_create_kernel(cl_program p, const char *name, cl_int *errcode_ret) { cl_kernel from = NULL, to = NULL; cl_int err = CL_SUCCESS; uint32_t i = 0; /* Find the program first */ for (i = 0; i < p->ker_n; ++i) { assert(p->ker[i]); const char *ker_name = cl_kernel_get_name(p->ker[i]); if (strcmp(ker_name, name) == 0) { from = p->ker[i]; break; } } /* We were not able to find this named kernel */ if (UNLIKELY(from == NULL)) { err = CL_INVALID_KERNEL_NAME; goto error; } TRY_ALLOC(to, cl_kernel_dup(from)); exit: if (errcode_ret) *errcode_ret = err; return to; error: cl_kernel_delete(to); to = NULL; goto exit; } LOCAL cl_int cl_program_create_kernels_in_program(cl_program p, cl_kernel* ker) { int i = 0; if(ker == NULL) return CL_SUCCESS; for (i = 0; i < p->ker_n; ++i) { TRY_ALLOC_NO_ERR(ker[i], cl_kernel_dup(p->ker[i])); } return CL_SUCCESS; error: do { cl_kernel_delete(ker[i]); ker[i--] = NULL; } while(i > 0); return CL_OUT_OF_HOST_MEMORY; } Release_v0.3/src/cl_program.h000066400000000000000000000075541223142177000163030ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #ifndef __CL_PROGRAM_H__ #define __CL_PROGRAM_H__ #include "cl_internals.h" #include "program.h" #include "CL/cl.h" #include #include // This is the structure ouput by the compiler struct _gbe_program; enum { FROM_SOURCE = 0, FROM_LLVM = 1, FROM_BINARY = 2 }; /* This maps an OCL file containing some kernels */ struct _cl_program { DEFINE_ICD(dispatch) uint64_t magic; /* To identify it as a program */ volatile int ref_n; /* We reference count this object */ gbe_program opaque; /* (Opaque) program as ouput by the compiler */ cl_kernel *ker; /* All kernels included by the OCL file */ cl_program prev, next; /* We chain the programs together */ cl_context ctx; /* Its parent context */ char *bin; /* The program copied verbatim */ size_t bin_sz; /* Its size in memory */ char *source; /* Program sources */ char *binary; /* Program binary. */ size_t binary_sz; /* The binary size. */ uint32_t ker_n; /* Number of declared kernels */ uint32_t source_type:2; /* Built from binary, source or LLVM */ uint32_t is_built:1; /* Did we call clBuildProgram on it? */ char *build_opts; /* The build options for this program */ }; /* Create a empty program */ extern cl_program cl_program_new(cl_context); /* Destroy and deallocate an empty kernel */ extern void cl_program_delete(cl_program); /* Add one more reference to the object (to defer its deletion) */ extern void cl_program_add_ref(cl_program); /* Create a kernel for the OCL user */ extern cl_kernel cl_program_create_kernel(cl_program, const char*, cl_int*); /* creates kernel objects for all kernel functions in program. */ extern cl_int cl_program_create_kernels_in_program(cl_program, cl_kernel*); /* Create a program from OCL source */ extern cl_program cl_program_create_from_source(cl_context ctx, cl_uint count, const char **strings, const size_t *lengths, cl_int *errcode_ret); /* Directly create a program from a blob */ extern cl_program cl_program_create_from_binary(cl_context context, cl_uint num_devices, const cl_device_id * devices, const size_t * lengths, const unsigned char ** binaries, cl_int * binary_status, cl_int * errcode_ret); /* Directly create a program from a LLVM source file */ extern cl_program cl_program_create_from_llvm(cl_context context, cl_uint num_devices, const cl_device_id * devices, const char * fileName, cl_int * errcode_ret); /* Build the program as specified by OCL */ extern cl_int cl_program_build(cl_program p, const char* options); #endif /* __CL_PROGRAM_H__ */ Release_v0.3/src/cl_sampler.c000066400000000000000000000101411223142177000162540ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #include "cl_context.h" #include "cl_sampler.h" #include "cl_utils.h" #include "cl_alloc.h" #include "cl_khr_icd.h" #include "cl_kernel.h" #include uint32_t cl_to_clk(cl_bool normalized_coords, cl_addressing_mode address, cl_filter_mode filter) { int clk_address; int clk_filter; switch (address) { case CL_ADDRESS_NONE: clk_address = CLK_ADDRESS_NONE; break; case CL_ADDRESS_CLAMP: clk_address = CLK_ADDRESS_CLAMP; break; case CL_ADDRESS_CLAMP_TO_EDGE: clk_address = CLK_ADDRESS_CLAMP_TO_EDGE; break; case CL_ADDRESS_REPEAT: clk_address = CLK_ADDRESS_REPEAT; break; case CL_ADDRESS_MIRRORED_REPEAT: clk_address = CLK_ADDRESS_MIRRORED_REPEAT; break; default: assert(0); } switch(filter) { case CL_FILTER_NEAREST: clk_filter = CLK_FILTER_NEAREST; break; case CL_FILTER_LINEAR: clk_filter = CLK_FILTER_LINEAR; break; default: assert(0); } return (clk_address << __CLK_ADDRESS_BASE) | (normalized_coords << __CLK_NORMALIZED_BASE) | (clk_filter); } #define IS_SAMPLER_ARG(v) (v & __CLK_SAMPLER_ARG_KEY_BIT) #define SAMPLER_ARG_ID(v) ((v & __CLK_SAMPLER_ARG_MASK) >> __CLK_SAMPLER_ARG_BASE) int cl_set_sampler_arg_slot(cl_kernel k, int index, cl_sampler sampler) { int slot_id; for(slot_id = 0; slot_id < k->sampler_sz; slot_id++) { if (IS_SAMPLER_ARG(k->samplers[slot_id])) { if (SAMPLER_ARG_ID(k->samplers[slot_id]) == index) { k->samplers[slot_id] = (k->samplers[slot_id] & (~__CLK_SAMPLER_MASK)) | sampler->clkSamplerValue; return slot_id; } } } assert(0); } LOCAL cl_sampler cl_sampler_new(cl_context ctx, cl_bool normalized_coords, cl_addressing_mode address, cl_filter_mode filter, cl_int *errcode_ret) { cl_sampler sampler = NULL; cl_int err = CL_SUCCESS; /* Allocate and inialize the structure itself */ TRY_ALLOC (sampler, CALLOC(struct _cl_sampler)); SET_ICD(sampler->dispatch) sampler->ref_n = 1; sampler->magic = CL_MAGIC_SAMPLER_HEADER; sampler->normalized_coords = normalized_coords; sampler->address = address; sampler->filter = filter; /* Append the sampler in the context sampler list */ pthread_mutex_lock(&ctx->sampler_lock); sampler->next = ctx->samplers; if (ctx->samplers != NULL) ctx->samplers->prev = sampler; ctx->samplers = sampler; pthread_mutex_unlock(&ctx->sampler_lock); sampler->ctx = ctx; cl_context_add_ref(ctx); sampler->clkSamplerValue = cl_to_clk(normalized_coords, address, filter); exit: if (errcode_ret) *errcode_ret = err; return sampler; error: cl_sampler_delete(sampler); sampler = NULL; goto exit; } LOCAL void cl_sampler_delete(cl_sampler sampler) { if (UNLIKELY(sampler == NULL)) return; if (atomic_dec(&sampler->ref_n) > 1) return; assert(sampler->ctx); pthread_mutex_lock(&sampler->ctx->sampler_lock); if (sampler->prev) sampler->prev->next = sampler->next; if (sampler->next) sampler->next->prev = sampler->prev; if (sampler->prev == NULL && sampler->next == NULL) sampler->ctx->samplers = NULL; pthread_mutex_unlock(&sampler->ctx->sampler_lock); cl_context_delete(sampler->ctx); cl_free(sampler); } LOCAL void cl_sampler_add_ref(cl_sampler sampler) { assert(sampler); atomic_inc(&sampler->ref_n); } Release_v0.3/src/cl_sampler.h000066400000000000000000000040411223142177000162630ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #ifndef __CL_SAMPLER_H__ #define __CL_SAMPLER_H__ #include "CL/cl.h" #include "../backend/src/ocl_common_defines.h" #include /* How to access images */ struct _cl_sampler { DEFINE_ICD(dispatch) uint64_t magic; /* To identify it as a sampler object */ volatile int ref_n; /* This object is reference counted */ cl_sampler prev, next; /* We chain the samplers in the allocator */ cl_context ctx; /* Context it belongs to */ cl_bool normalized_coords; /* Are coordinates normalized? */ cl_addressing_mode address;/* CLAMP / REPEAT and so on... */ cl_filter_mode filter; /* LINEAR / NEAREST mostly */ uint32_t clkSamplerValue; }; /* Create a new sampler object */ extern cl_sampler cl_sampler_new(cl_context, cl_bool, cl_addressing_mode, cl_filter_mode, cl_int *err); /* Unref the object and delete it if no more reference on it */ extern void cl_sampler_delete(cl_sampler); /* Add one more reference to this object */ extern void cl_sampler_add_ref(cl_sampler); /* set a sampler kernel argument */ int cl_set_sampler_arg_slot(cl_kernel k, int index, cl_sampler sampler); #endif /* __CL_SAMPLER_H__ */ Release_v0.3/src/cl_utils.h000066400000000000000000000277251223142177000157760ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #ifndef __CL_UTILS_H__ #define __CL_UTILS_H__ /* INLINE is forceinline */ #define INLINE __attribute__((always_inline)) inline /* Branch hint */ #define LIKELY(x) __builtin_expect((x),1) #define UNLIKELY(x) __builtin_expect((x),0) /* Stringify macros */ #define JOIN(X, Y) _DO_JOIN(X, Y) #define _DO_JOIN(X, Y) _DO_JOIN2(X, Y) #define _DO_JOIN2(X, Y) X##Y /* Check compile time errors */ #define STATIC_ASSERT(value) \ struct JOIN(__,JOIN(__,__LINE__)) { \ int x[(value) ? 1 : -1]; \ } /* Throw errors */ #ifdef NDEBUG #define ERR(ERROR, ...) \ do { \ err = ERROR; \ goto error; \ } while (0) #else #define ERR(ERROR, ...) \ do { \ fprintf(stderr, "error in %s line %i\n", __FILE__, __LINE__); \ fprintf(stderr, __VA_ARGS__); \ fprintf(stderr, "\n"); \ err = ERROR; \ goto error; \ } while (0) #endif #define DO_ALLOC_ERR \ do { \ ERR(CL_OUT_OF_HOST_MEMORY, "Out of memory"); \ } while (0) #define ERR_IF(COND, ERROR, ...) \ do { \ if (UNLIKELY(COND)) ERR (ERROR, __VA_ARGS__); \ } while (0) #define INVALID_VALUE_IF(COND) \ do { \ ERR_IF(COND, CL_INVALID_VALUE, "Invalid value"); \ } while (0) #define INVALID_DEVICE_IF(COND) \ do { \ ERR_IF(COND, CL_INVALID_DEVICE, "Invalid device"); \ } while (0) #define MAX(x0, x1) ((x0) > (x1) ? (x0) : (x1)) #define MIN(x0, x1) ((x0) < (x1) ? (x0) : (x1)) #define ALIGN(A, B) (((A) % (B)) ? (A) + (B) - ((A) % (B)) : (A)) #define DO_ALLOC_ERROR \ do { \ err = CL_OUT_OF_HOST_MEMORY; \ goto error; \ } while (0) #define FATAL(...) \ do { \ fprintf(stderr, "error: "); \ fprintf(stderr, __VA_ARGS__); \ fprintf(stderr, "\n"); \ assert(0); \ exit(-1); \ } while (0) #define FATAL_IF(COND, ...) \ do { \ if (UNLIKELY(COND)) FATAL(__VA_ARGS__); \ } while (0) #define NOT_IMPLEMENTED FATAL ("Not implemented") #define CHECK_CONTEXT(CTX) \ do { \ if (UNLIKELY(CTX == NULL)) { \ err = CL_INVALID_CONTEXT; \ goto error; \ } \ if (UNLIKELY(CTX->magic != CL_MAGIC_CONTEXT_HEADER)) { \ err = CL_INVALID_CONTEXT; \ goto error; \ } \ } while (0) #define CHECK_QUEUE(QUEUE) \ do { \ if (UNLIKELY(QUEUE == NULL)) { \ err = CL_INVALID_COMMAND_QUEUE; \ goto error; \ } \ if (UNLIKELY(QUEUE->magic != CL_MAGIC_QUEUE_HEADER)) { \ err = CL_INVALID_COMMAND_QUEUE; \ goto error; \ } \ } while (0) #define CHECK_MEM(MEM) \ do { \ if (UNLIKELY(MEM == NULL)) { \ err = CL_INVALID_MEM_OBJECT; \ goto error; \ } \ if (UNLIKELY(MEM->magic != CL_MAGIC_MEM_HEADER)) { \ err = CL_INVALID_MEM_OBJECT; \ goto error; \ } \ } while (0) #define CHECK_IMAGE(MEM, IMAGE) \ CHECK_MEM(MEM); \ do { \ if (UNLIKELY(!IS_IMAGE(MEM))) { \ err = CL_INVALID_MEM_OBJECT; \ goto error; \ } \ } while (0); \ struct _cl_mem_image *IMAGE; \ IMAGE = cl_mem_image(MEM); \ #define CHECK_EVENT(EVENT) \ do { \ if (UNLIKELY(EVENT == NULL)) { \ err = CL_INVALID_EVENT; \ goto error; \ } \ if (UNLIKELY(EVENT->magic != CL_MAGIC_EVENT_HEADER)) { \ err = CL_INVALID_EVENT; \ goto error; \ } \ } while (0) #define CHECK_SAMPLER(SAMPLER) \ do { \ if (UNLIKELY(SAMPLER == NULL)) { \ err = CL_INVALID_SAMPLER; \ goto error; \ } \ if (UNLIKELY(SAMPLER->magic != CL_MAGIC_SAMPLER_HEADER)) {\ err = CL_INVALID_SAMPLER; \ goto error; \ } \ } while (0) #define CHECK_KERNEL(KERNEL) \ do { \ if (UNLIKELY(KERNEL == NULL)) { \ err = CL_INVALID_KERNEL; \ goto error; \ } \ if (UNLIKELY(KERNEL->magic != CL_MAGIC_KERNEL_HEADER)) { \ err = CL_INVALID_KERNEL; \ goto error; \ } \ } while (0) #define CHECK_PROGRAM(PROGRAM) \ do { \ if (UNLIKELY(PROGRAM == NULL)) { \ err = CL_INVALID_PROGRAM; \ goto error; \ } \ if (UNLIKELY(PROGRAM->magic != CL_MAGIC_PROGRAM_HEADER)) {\ err = CL_INVALID_PROGRAM; \ goto error; \ } \ } while (0) #define ELEMENTS(x) (sizeof(x)/sizeof(*(x))) #define CALLOC_STRUCT(T) (struct T*) cl_calloc(1, sizeof(struct T)) #define CALLOC(T) (T*) cl_calloc(1, sizeof(T)) #define CALLOC_ARRAY(T, N) (T*) cl_calloc(N, sizeof(T)) #define MEMZERO(x) do { memset((x),0,sizeof(*(x))); } while (0) /* Run some code and catch errors */ #define TRY(fn,...) \ do { \ if (UNLIKELY((err = fn(__VA_ARGS__)) != CL_SUCCESS)) \ goto error; \ } while (0) #define TRY_NO_ERR(fn,...) \ do { \ if (UNLIKELY(fn(__VA_ARGS__) != CL_SUCCESS)) \ goto error; \ } while (0) #define TRY_ALLOC(dst, EXPR) \ do { \ if (UNLIKELY((dst = EXPR) == NULL)) \ DO_ALLOC_ERROR; \ } while (0) #define TRY_ALLOC_NO_ERR(dst, EXPR) \ do { \ if (UNLIKELY((dst = EXPR) == NULL)) \ goto error; \ } while (0) #define TRY_ALLOC_NO_RET(EXPR) \ do { \ if (UNLIKELY((EXPR) == NULL)) \ DO_ALLOC_ERROR; \ } while (0) /* Break Point Definitions */ #if !defined(NDEBUG) #define BREAK \ do { \ __asm__("int3"); \ } while(0) #define BREAK_IF(value) \ do { \ if (UNLIKELY(!(value))) BREAKPOINT(); \ } while(0) #else #define BREAKPOINT() do { } while(0) #define ASSERT(value) do { } while(0) #endif /* For all internal functions */ #define LOCAL __attribute__ ((visibility ("internal"))) /* Align a structure or a variable */ #define ALIGNED(X) __attribute__ ((aligned (X))) /* Number of DWORDS */ #define SIZEOF32(X) (sizeof(X) / sizeof(uint32_t)) /* Memory quantity */ #define KB 1024 #define MB (KB*KB) /* To help bitfield definitions */ #define BITFIELD_BIT(X) 1 #define BITFIELD_RANGE(X,Y) ((Y) - (X) + 1) /* 32 bits atomic variable */ typedef volatile int atomic_t; static INLINE int atomic_add(atomic_t *v, const int c) { register int i = c; __asm__ __volatile__("lock ; xaddl %0, %1;" : "+r"(i), "+m"(*v) : "m"(*v), "r"(i)); return i; } static INLINE int atomic_inc(atomic_t *v) { return atomic_add(v, 1); } static INLINE int atomic_dec(atomic_t *v) { return atomic_add(v, -1); } #endif /* __CL_UTILS_H__ */ Release_v0.3/src/intel/000077500000000000000000000000001223142177000151055ustar00rootroot00000000000000Release_v0.3/src/intel/intel_batchbuffer.c000066400000000000000000000121261223142177000207210ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /************************************************************************** * * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * **************************************************************************/ #include "intel/intel_batchbuffer.h" #include "intel/intel_driver.h" #include "cl_alloc.h" #include "cl_utils.h" #include #include #include LOCAL void intel_batchbuffer_reset(intel_batchbuffer_t *batch, size_t sz) { if (batch->buffer != NULL) { dri_bo_unreference(batch->buffer); batch->buffer = NULL; batch->last_bo = NULL; } batch->buffer = dri_bo_alloc(batch->intel->bufmgr, "batch buffer", sz, 64); assert(batch->buffer); dri_bo_map(batch->buffer, 1); batch->map = (uint8_t*) batch->buffer->virtual; batch->size = sz; batch->ptr = batch->map; batch->atomic = 0; batch->last_bo = batch->buffer; } LOCAL void intel_batchbuffer_init(intel_batchbuffer_t *batch, intel_driver_t *intel) { assert(intel); batch->intel = intel; } LOCAL void intel_batchbuffer_terminate(intel_batchbuffer_t *batch) { assert(batch->buffer); if (batch->map) { dri_bo_unmap(batch->buffer); batch->map = NULL; } dri_bo_unreference(batch->buffer); batch->buffer = NULL; } LOCAL void intel_batchbuffer_flush(intel_batchbuffer_t *batch) { uint32_t used = batch->ptr - batch->map; int is_locked = batch->intel->locked; if (used == 0) return; if ((used & 4) == 0) { *(uint32_t*) batch->ptr = 0; batch->ptr += 4; } *(uint32_t*)batch->ptr = MI_BATCH_BUFFER_END; batch->ptr += 4; dri_bo_unmap(batch->buffer); used = batch->ptr - batch->map; if (!is_locked) intel_driver_lock_hardware(batch->intel); dri_bo_exec(batch->buffer, used, 0, 0, 0); if (!is_locked) intel_driver_unlock_hardware(batch->intel); // Release the buffer intel_batchbuffer_terminate(batch); } LOCAL void intel_batchbuffer_emit_reloc(intel_batchbuffer_t *batch, dri_bo *bo, uint32_t read_domains, uint32_t write_domains, uint32_t delta) { assert(batch->ptr - batch->map < batch->size); dri_bo_emit_reloc(batch->buffer, read_domains, write_domains, delta, batch->ptr - batch->map, bo); intel_batchbuffer_emit_dword(batch, bo->offset + delta); } LOCAL void intel_batchbuffer_emit_mi_flush(intel_batchbuffer_t *batch) { intel_batchbuffer_require_space(batch, 4); intel_batchbuffer_emit_dword(batch, MI_FLUSH | STATE_INSTRUCTION_CACHE_INVALIDATE); } LOCAL intel_batchbuffer_t* intel_batchbuffer_new(intel_driver_t *intel) { intel_batchbuffer_t *batch = NULL; assert(intel); TRY_ALLOC_NO_ERR (batch, CALLOC(intel_batchbuffer_t)); intel_batchbuffer_init(batch, intel); exit: return batch; error: intel_batchbuffer_delete(batch); batch = NULL; goto exit; } LOCAL void intel_batchbuffer_delete(intel_batchbuffer_t *batch) { if (batch == NULL) return; if(batch->buffer) intel_batchbuffer_terminate(batch); cl_free(batch); } Release_v0.3/src/intel/intel_batchbuffer.h000066400000000000000000000121561223142177000207310ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /************************************************************************** * * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * **************************************************************************/ #ifndef _INTEL_BATCHBUFFER_H_ #define _INTEL_BATCHBUFFER_H_ #include "intel_defines.h" #include "cl_utils.h" #include #include #include #include #include #include #include #define BEGIN_BATCH(b, n) do { \ intel_batchbuffer_require_space(b, (n) * 4); \ } while (0) #define OUT_BATCH(b, d) do { \ intel_batchbuffer_emit_dword(b, d); \ } while (0) #define OUT_RELOC(b, bo, read_domains, write_domain, delta) do { \ assert((delta) >= 0); \ intel_batchbuffer_emit_reloc(b, bo, read_domains, write_domain, delta); \ } while (0) #define ADVANCE_BATCH(b) do { } while (0) struct intel_driver; typedef struct intel_batchbuffer { struct intel_driver *intel; drm_intel_bo *buffer; /** Last bo submitted to the hardware. used for clFinish. */ drm_intel_bo *last_bo; uint32_t size; uint8_t *map; uint8_t *ptr; int atomic; } intel_batchbuffer_t; extern intel_batchbuffer_t* intel_batchbuffer_new(struct intel_driver*); extern void intel_batchbuffer_delete(intel_batchbuffer_t*); extern void intel_batchbuffer_emit_reloc(intel_batchbuffer_t*, drm_intel_bo*, uint32_t read_domains, uint32_t write_domains, uint32_t delta); extern void intel_batchbuffer_emit_mi_flush(intel_batchbuffer_t*); extern void intel_batchbuffer_init(intel_batchbuffer_t*, struct intel_driver*); extern void intel_batchbuffer_terminate(intel_batchbuffer_t*); extern void intel_batchbuffer_flush(intel_batchbuffer_t*); extern void intel_batchbuffer_reset(intel_batchbuffer_t*, size_t sz); static INLINE uint32_t intel_batchbuffer_space(const intel_batchbuffer_t *batch) { assert(batch->ptr); return batch->size - (batch->ptr - batch->map); } static INLINE void intel_batchbuffer_emit_dword(intel_batchbuffer_t *batch, uint32_t x) { assert(intel_batchbuffer_space(batch) >= 4); *(uint32_t*)batch->ptr = x; batch->ptr += 4; } static INLINE void intel_batchbuffer_require_space(intel_batchbuffer_t *batch, uint32_t size) { assert(size < batch->size - 8); if (intel_batchbuffer_space(batch) < size) intel_batchbuffer_space(batch); } static INLINE uint8_t* intel_batchbuffer_alloc_space(intel_batchbuffer_t *batch, uint32_t size) { assert(intel_batchbuffer_space(batch) >= size); uint8_t *space_ptr = batch->ptr; batch->ptr += size; return space_ptr; } static INLINE void intel_batchbuffer_start_atomic(intel_batchbuffer_t *batch, uint32_t size) { assert(!batch->atomic); intel_batchbuffer_require_space(batch, size); batch->atomic = 1; } static INLINE void intel_batchbuffer_end_atomic(intel_batchbuffer_t *batch) { assert(batch->atomic); batch->atomic = 0; } #endif /* _INTEL_BATCHBUFFER_H_ */ Release_v0.3/src/intel/intel_defines.h000066400000000000000000000411721223142177000200730ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /* Copyright (C) Intel Corp. 2006. All Rights Reserved. Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to develop this 3D driver. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice (including the next paragraph) shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. **********************************************************************/ /* * Authors: * Keith Whitwell */ #ifndef __GENX_DEFINES_H__ #define __GENX_DEFINES_H__ #define CMD(PIPELINE,OP,SUB_OP) ((3 << 29) | \ ((PIPELINE) << 27) | \ ((OP) << 24) | \ ((SUB_OP) << 16)) #define CMD_URB_FENCE CMD(0, 0, 0) #define CMD_CS_URB_STATE CMD(0, 0, 1) #define CMD_CONSTANT_BUFFER CMD(0, 0, 2) #define CMD_STATE_PREFETCH CMD(0, 0, 3) #define CMD_MEDIA_GATEWAY_STATE CMD(2, 0, 3) #define CMD_MEDIA_STATE_FLUSH CMD(2, 0, 4) #define CMD_GPGPU_WALKER CMD(2, 1, 5) #define CMD_PIPE_CONTROL CMD(3, 2, 0) #define CMD_LOAD_REGISTER_IMM (0x22 << 23) #define CMD_STATE_BASE_ADDRESS CMD(0, 1, 1) #define CMD_STATE_SIP CMD(0, 1, 2) #define CMD_PIPELINE_SELECT CMD(1, 1, 4) #define CMD_SAMPLER_PALETTE_LOAD CMD(3, 1, 2) #define CMD_MEDIA_STATE_POINTERS CMD(2, 0, 0) #define CMD_MEDIA CMD(2, 1, 0) #define CMD_MEDIA_EX CMD(2, 1, 1) #define CMD_PIPELINED_POINTERS CMD(3, 0, 0) #define CMD_BINDING_TABLE_POINTERS CMD(3, 0, 1) #define CMD_VERTEX_BUFFERS CMD(3, 0, 8) #define CMD_VERTEX_ELEMENTS CMD(3, 0, 9) #define CMD_DRAWING_RECTANGLE CMD(3, 1, 0) #define CMD_CONSTANT_COLOR CMD(3, 1, 1) #define CMD_3DPRIMITIVE CMD(3, 3, 0) #define BASE_ADDRESS_MODIFY (1 << 0) #define PIPELINE_SELECT_3D 0 #define PIPELINE_SELECT_MEDIA 1 #define UF0_CS_REALLOC (1 << 13) #define UF0_VFE_REALLOC (1 << 12) #define UF0_SF_REALLOC (1 << 11) #define UF0_CLIP_REALLOC (1 << 10) #define UF0_GS_REALLOC (1 << 9) #define UF0_VS_REALLOC (1 << 8) #define UF1_CLIP_FENCE_SHIFT 20 #define UF1_GS_FENCE_SHIFT 10 #define UF1_VS_FENCE_SHIFT 0 #define UF2_CS_FENCE_SHIFT 20 #define UF2_VFE_FENCE_SHIFT 10 #define UF2_SF_FENCE_SHIFT 0 #define FLOATING_POINT_IEEE_754 0 #define FLOATING_POINT_NON_IEEE_754 1 #define I965_SURFACE_1D 0 #define I965_SURFACE_2D 1 #define I965_SURFACE_3D 2 #define I965_SURFACE_CUBE 3 #define I965_SURFACE_BUFFER 4 #define I965_SURFACE_NULL 7 #define I965_SURFACEFORMAT_R32G32B32A32_FLOAT 0x000 #define I965_SURFACEFORMAT_R32G32B32A32_SINT 0x001 #define I965_SURFACEFORMAT_R32G32B32A32_UINT 0x002 #define I965_SURFACEFORMAT_R32G32B32A32_UNORM 0x003 #define I965_SURFACEFORMAT_R32G32B32A32_SNORM 0x004 #define I965_SURFACEFORMAT_R64G64_FLOAT 0x005 #define I965_SURFACEFORMAT_R32G32B32X32_FLOAT 0x006 #define I965_SURFACEFORMAT_R32G32B32A32_SSCALED 0x007 #define I965_SURFACEFORMAT_R32G32B32A32_USCALED 0x008 #define I965_SURFACEFORMAT_R32G32B32_FLOAT 0x040 #define I965_SURFACEFORMAT_R32G32B32_SINT 0x041 #define I965_SURFACEFORMAT_R32G32B32_UINT 0x042 #define I965_SURFACEFORMAT_R32G32B32_UNORM 0x043 #define I965_SURFACEFORMAT_R32G32B32_SNORM 0x044 #define I965_SURFACEFORMAT_R32G32B32_SSCALED 0x045 #define I965_SURFACEFORMAT_R32G32B32_USCALED 0x046 #define I965_SURFACEFORMAT_R16G16B16A16_UNORM 0x080 #define I965_SURFACEFORMAT_R16G16B16A16_SNORM 0x081 #define I965_SURFACEFORMAT_R16G16B16A16_SINT 0x082 #define I965_SURFACEFORMAT_R16G16B16A16_UINT 0x083 #define I965_SURFACEFORMAT_R16G16B16A16_FLOAT 0x084 #define I965_SURFACEFORMAT_R32G32_FLOAT 0x085 #define I965_SURFACEFORMAT_R32G32_SINT 0x086 #define I965_SURFACEFORMAT_R32G32_UINT 0x087 #define I965_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS 0x088 #define I965_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT 0x089 #define I965_SURFACEFORMAT_L32A32_FLOAT 0x08A #define I965_SURFACEFORMAT_R32G32_UNORM 0x08B #define I965_SURFACEFORMAT_R32G32_SNORM 0x08C #define I965_SURFACEFORMAT_R64_FLOAT 0x08D #define I965_SURFACEFORMAT_R16G16B16X16_UNORM 0x08E #define I965_SURFACEFORMAT_R16G16B16X16_FLOAT 0x08F #define I965_SURFACEFORMAT_A32X32_FLOAT 0x090 #define I965_SURFACEFORMAT_L32X32_FLOAT 0x091 #define I965_SURFACEFORMAT_I32X32_FLOAT 0x092 #define I965_SURFACEFORMAT_R16G16B16A16_SSCALED 0x093 #define I965_SURFACEFORMAT_R16G16B16A16_USCALED 0x094 #define I965_SURFACEFORMAT_R32G32_SSCALED 0x095 #define I965_SURFACEFORMAT_R32G32_USCALED 0x096 #define I965_SURFACEFORMAT_B8G8R8A8_UNORM 0x0C0 #define I965_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB 0x0C1 #define I965_SURFACEFORMAT_R10G10B10A2_UNORM 0x0C2 #define I965_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB 0x0C3 #define I965_SURFACEFORMAT_R10G10B10A2_UINT 0x0C4 #define I965_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM 0x0C5 #define I965_SURFACEFORMAT_R8G8B8A8_UNORM 0x0C7 #define I965_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB 0x0C8 #define I965_SURFACEFORMAT_R8G8B8A8_SNORM 0x0C9 #define I965_SURFACEFORMAT_R8G8B8A8_SINT 0x0CA #define I965_SURFACEFORMAT_R8G8B8A8_UINT 0x0CB #define I965_SURFACEFORMAT_R16G16_UNORM 0x0CC #define I965_SURFACEFORMAT_R16G16_SNORM 0x0CD #define I965_SURFACEFORMAT_R16G16_SINT 0x0CE #define I965_SURFACEFORMAT_R16G16_UINT 0x0CF #define I965_SURFACEFORMAT_R16G16_FLOAT 0x0D0 #define I965_SURFACEFORMAT_B10G10R10A2_UNORM 0x0D1 #define I965_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB 0x0D2 #define I965_SURFACEFORMAT_R11G11B10_FLOAT 0x0D3 #define I965_SURFACEFORMAT_R32_SINT 0x0D6 #define I965_SURFACEFORMAT_R32_UINT 0x0D7 #define I965_SURFACEFORMAT_R32_FLOAT 0x0D8 #define I965_SURFACEFORMAT_R24_UNORM_X8_TYPELESS 0x0D9 #define I965_SURFACEFORMAT_X24_TYPELESS_G8_UINT 0x0DA #define I965_SURFACEFORMAT_L16A16_UNORM 0x0DF #define I965_SURFACEFORMAT_I24X8_UNORM 0x0E0 #define I965_SURFACEFORMAT_L24X8_UNORM 0x0E1 #define I965_SURFACEFORMAT_A24X8_UNORM 0x0E2 #define I965_SURFACEFORMAT_I32_FLOAT 0x0E3 #define I965_SURFACEFORMAT_L32_FLOAT 0x0E4 #define I965_SURFACEFORMAT_A32_FLOAT 0x0E5 #define I965_SURFACEFORMAT_B8G8R8X8_UNORM 0x0E9 #define I965_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB 0x0EA #define I965_SURFACEFORMAT_R8G8B8X8_UNORM 0x0EB #define I965_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB 0x0EC #define I965_SURFACEFORMAT_R9G9B9E5_SHAREDEXP 0x0ED #define I965_SURFACEFORMAT_B10G10R10X2_UNORM 0x0EE #define I965_SURFACEFORMAT_L16A16_FLOAT 0x0F0 #define I965_SURFACEFORMAT_R32_UNORM 0x0F1 #define I965_SURFACEFORMAT_R32_SNORM 0x0F2 #define I965_SURFACEFORMAT_R10G10B10X2_USCALED 0x0F3 #define I965_SURFACEFORMAT_R8G8B8A8_SSCALED 0x0F4 #define I965_SURFACEFORMAT_R8G8B8A8_USCALED 0x0F5 #define I965_SURFACEFORMAT_R16G16_SSCALED 0x0F6 #define I965_SURFACEFORMAT_R16G16_USCALED 0x0F7 #define I965_SURFACEFORMAT_R32_SSCALED 0x0F8 #define I965_SURFACEFORMAT_R32_USCALED 0x0F9 #define I965_SURFACEFORMAT_B5G6R5_UNORM 0x100 #define I965_SURFACEFORMAT_B5G6R5_UNORM_SRGB 0x101 #define I965_SURFACEFORMAT_B5G5R5A1_UNORM 0x102 #define I965_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB 0x103 #define I965_SURFACEFORMAT_B4G4R4A4_UNORM 0x104 #define I965_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB 0x105 #define I965_SURFACEFORMAT_R8G8_UNORM 0x106 #define I965_SURFACEFORMAT_R8G8_SNORM 0x107 #define I965_SURFACEFORMAT_R8G8_SINT 0x108 #define I965_SURFACEFORMAT_R8G8_UINT 0x109 #define I965_SURFACEFORMAT_R16_UNORM 0x10A #define I965_SURFACEFORMAT_R16_SNORM 0x10B #define I965_SURFACEFORMAT_R16_SINT 0x10C #define I965_SURFACEFORMAT_R16_UINT 0x10D #define I965_SURFACEFORMAT_R16_FLOAT 0x10E #define I965_SURFACEFORMAT_I16_UNORM 0x111 #define I965_SURFACEFORMAT_L16_UNORM 0x112 #define I965_SURFACEFORMAT_A16_UNORM 0x113 #define I965_SURFACEFORMAT_L8A8_UNORM 0x114 #define I965_SURFACEFORMAT_I16_FLOAT 0x115 #define I965_SURFACEFORMAT_L16_FLOAT 0x116 #define I965_SURFACEFORMAT_A16_FLOAT 0x117 #define I965_SURFACEFORMAT_R5G5_SNORM_B6_UNORM 0x119 #define I965_SURFACEFORMAT_B5G5R5X1_UNORM 0x11A #define I965_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB 0x11B #define I965_SURFACEFORMAT_R8G8_SSCALED 0x11C #define I965_SURFACEFORMAT_R8G8_USCALED 0x11D #define I965_SURFACEFORMAT_R16_SSCALED 0x11E #define I965_SURFACEFORMAT_R16_USCALED 0x11F #define I965_SURFACEFORMAT_R8_UNORM 0x140 #define I965_SURFACEFORMAT_R8_SNORM 0x141 #define I965_SURFACEFORMAT_R8_SINT 0x142 #define I965_SURFACEFORMAT_R8_UINT 0x143 #define I965_SURFACEFORMAT_A8_UNORM 0x144 #define I965_SURFACEFORMAT_I8_UNORM 0x145 #define I965_SURFACEFORMAT_L8_UNORM 0x146 #define I965_SURFACEFORMAT_P4A4_UNORM 0x147 #define I965_SURFACEFORMAT_A4P4_UNORM 0x148 #define I965_SURFACEFORMAT_R8_SSCALED 0x149 #define I965_SURFACEFORMAT_R8_USCALED 0x14A #define I965_SURFACEFORMAT_R1_UINT 0x181 #define I965_SURFACEFORMAT_YCRCB_NORMAL 0x182 #define I965_SURFACEFORMAT_YCRCB_SWAPUVY 0x183 #define I965_SURFACEFORMAT_BC1_UNORM 0x186 #define I965_SURFACEFORMAT_BC2_UNORM 0x187 #define I965_SURFACEFORMAT_BC3_UNORM 0x188 #define I965_SURFACEFORMAT_BC4_UNORM 0x189 #define I965_SURFACEFORMAT_BC5_UNORM 0x18A #define I965_SURFACEFORMAT_BC1_UNORM_SRGB 0x18B #define I965_SURFACEFORMAT_BC2_UNORM_SRGB 0x18C #define I965_SURFACEFORMAT_BC3_UNORM_SRGB 0x18D #define I965_SURFACEFORMAT_MONO8 0x18E #define I965_SURFACEFORMAT_YCRCB_SWAPUV 0x18F #define I965_SURFACEFORMAT_YCRCB_SWAPY 0x190 #define I965_SURFACEFORMAT_DXT1_RGB 0x191 #define I965_SURFACEFORMAT_FXT1 0x192 #define I965_SURFACEFORMAT_R8G8B8_UNORM 0x193 #define I965_SURFACEFORMAT_R8G8B8_SNORM 0x194 #define I965_SURFACEFORMAT_R8G8B8_SSCALED 0x195 #define I965_SURFACEFORMAT_R8G8B8_USCALED 0x196 #define I965_SURFACEFORMAT_R64G64B64A64_FLOAT 0x197 #define I965_SURFACEFORMAT_R64G64B64_FLOAT 0x198 #define I965_SURFACEFORMAT_BC4_SNORM 0x199 #define I965_SURFACEFORMAT_BC5_SNORM 0x19A #define I965_SURFACEFORMAT_R16G16B16_UNORM 0x19C #define I965_SURFACEFORMAT_R16G16B16_SNORM 0x19D #define I965_SURFACEFORMAT_R16G16B16_SSCALED 0x19E #define I965_SURFACEFORMAT_R16G16B16_USCALED 0x19F #define I965_SURFACEFORMAT_RAW 0x1FF #define I965_MAPFILTER_NEAREST 0x0 #define I965_MAPFILTER_LINEAR 0x1 #define I965_MAPFILTER_ANISOTROPIC 0x2 #define I965_MIPFILTER_NONE 0 #define I965_MIPFILTER_NEAREST 1 #define I965_MIPFILTER_LINEAR 3 #define I965_TEXCOORDMODE_WRAP 0 #define I965_TEXCOORDMODE_MIRROR 1 #define I965_TEXCOORDMODE_CLAMP 2 #define I965_TEXCOORDMODE_CUBE 3 #define I965_TEXCOORDMODE_CLAMP_BORDER 4 #define I965_TEXCOORDMODE_MIRROR_ONCE 5 #define I965_SURFACERETURNFORMAT_FLOAT32 0 #define I965_SURFACERETURNFORMAT_S1 1 #define I965_TILEWALK_XMAJOR 0 #define I965_TILEWALK_YMAJOR 1 #define URB_SIZE(intel) (IS_IGDNG(intel->device_id) ? 1024 : \ IS_G4X(intel->device_id) ? 384 : 256) // L3 cache stuff #define GEN7_L3_CNTL_REG2_ADDRESS_OFFSET (0xB020) #define GEN7_L3_CNTL_REG3_ADDRESS_OFFSET (0xB024) // To issue pipe controls (reset L3 / SLM or stall) #define GEN7_PIPE_CONTROL_MEDIA 0x2 #define GEN7_PIPE_CONTROL_3D 0x3 #define GEN7_PIPE_CONTROL_INSTRUCTION_GFX 0x3 #define GEN7_PIPE_CONTROL_OPCODE_3D_CONTROL 0x2 #define GEN7_PIPE_CONTROL_SUBOPCODE_3D_CONTROL 0x0 #define GEN7_PIPE_CONTROL_WRITE_TIMESTAMP (3 << 14) #define GEN7_PIPE_CONTROL_GLOBAL_GTT_WRITE (1 << 2) #define GEN_MAPFILTER_NEAREST 0x0 #define GEN_MAPFILTER_LINEAR 0x1 #define GEN_MAPFILTER_ANISOTROPIC 0x2 #define GEN_MIPFILTER_NONE 0 #define GEN_MIPFILTER_NEAREST 1 #define GEN_MIPFILTER_LINEAR 3 #define GEN_ADDRESS_ROUNDING_ENABLE_U_MAG 0x20 #define GEN_ADDRESS_ROUNDING_ENABLE_U_MIN 0x10 #define GEN_ADDRESS_ROUNDING_ENABLE_V_MAG 0x08 #define GEN_ADDRESS_ROUNDING_ENABLE_V_MIN 0x04 #define GEN_ADDRESS_ROUNDING_ENABLE_R_MAG 0x02 #define GEN_ADDRESS_ROUNDING_ENABLE_R_MIN 0x01 #define GEN_TEXCOORDMODE_WRAP 0 #define GEN_TEXCOORDMODE_MIRROR 1 #define GEN_TEXCOORDMODE_CLAMP 2 #define GEN_TEXCOORDMODE_CUBE 3 #define GEN_TEXCOORDMODE_CLAMP_BORDER 4 #define GEN_TEXCOORDMODE_MIRROR_ONCE 5 #endif /* __GENX_DEFINES_H__ */ Release_v0.3/src/intel/intel_dri_resource_sharing.c000066400000000000000000000157251223142177000226560ustar00rootroot00000000000000/************************************************************************** * * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * **************************************************************************/ #define HAVE_PTHREAD 1 #include #include #include "main/context.h" #include "main/renderbuffer.h" #include "main/texobj.h" #include #include #include #include #include #include #include "intel_mipmap_tree.h" #include "intel_regions.h" #include "intel_context.h" #include "intel_dri_resource_sharing.h" #include "intel_dri_resource_sharing_int.h" #include /** * Sets up a DRIImage structure to point to our shared image in a region */ static bool intel_setup_cl_region_from_mipmap_tree(void *driver, struct intel_context *intel, struct intel_mipmap_tree *mt, GLuint level, GLuint zoffset, struct _intel_dri_share_image_region *region) { unsigned int draw_x, draw_y; uint32_t mask_x, mask_y; struct intel_region *null_region = (struct intel_region *)NULL; intel_miptree_check_level_layer(mt, level, zoffset); _intel_region_get_tile_masks(mt->region, &mask_x, &mask_y, false); _intel_miptree_get_image_offset(mt, level, zoffset, &draw_x, &draw_y); region->w = mt->level[level].width; region->h = mt->level[level].height; region->tile_x = draw_x & mask_x; region->tile_y = draw_y & mask_y; region->tiling = mt->region->tiling; /* XXX hard code to 1 right now. */ region->depth = 1; region->row_pitch = mt->region->pitch; region->offset = _intel_region_get_aligned_offset(mt->region, draw_x & ~mask_x, draw_y & ~mask_y, false); if (!_intel_region_flink(mt->region, ®ion->name)) return false; _intel_region_reference(&null_region, mt->region); return true; } typedef void _mesa_test_texobj_completeness_t( const struct gl_context *ctx, struct gl_texture_object *t ); _mesa_test_texobj_completeness_t *__mesa_test_texobj_completeness; typedef struct gl_texture_object * _mesa_lookup_texture_t( const struct gl_context *ctx, GLuint id); _mesa_lookup_texture_t *__mesa_lookup_texture; static struct gl_texture_object * intel_get_gl_obj_from_texture(void *driver, struct intel_context *intel, GLenum target, GLint level, GLuint texture, GLuint face) { struct gl_texture_object *obj; __mesa_lookup_texture = dlsym(driver, "_mesa_lookup_texture"); obj = __mesa_lookup_texture(&intel->ctx, texture); if (!obj || obj->Target != target) { return NULL; } __mesa_test_texobj_completeness = dlsym(driver, "_mesa_test_texobj_completeness"); __mesa_test_texobj_completeness(&intel->ctx, obj); if (!obj->_BaseComplete || (level > 0 && !obj->_MipmapComplete)) { return NULL; } if (level < obj->BaseLevel || level > obj->_MaxLevel) { return NULL; } return obj; } static GLenum get_cl_gl_format(gl_format format) { switch (format) { case MESA_FORMAT_RGBA8888: return GL_RGBA; case MESA_FORMAT_ARGB8888: return GL_BGRA; default: return GL_BGRA; } } static bool intelAcquireTexture(void *driver, __DRIcontext *context, GLenum target, GLint level, GLuint texture, void *user_data) { struct _intel_dri_share_image_region *region = intel_dri_share_image_region(user_data); struct intel_context *intel = context->driverPrivate; struct gl_texture_object *obj; struct intel_texture_object *iobj; /* XXX Always be face 0? */ GLuint face = 0; obj = intel_get_gl_obj_from_texture(driver, intel, target, level, texture, face); if (obj == NULL) return false; iobj = intel_texture_object(obj); region->gl_format = get_cl_gl_format(obj->Image[face][level]->TexFormat); return intel_setup_cl_region_from_mipmap_tree(driver, intel, iobj->mt, level, 0, region); } static bool intelReleaseTexture(void *driver, __DRIcontext *context, GLenum target, GLint level, GLuint texture) { struct intel_context *intel = context->driverPrivate; struct gl_texture_object *obj; struct intel_texture_object *iobj; /* XXX Always be face 0? */ GLuint face = 0; obj = intel_get_gl_obj_from_texture(driver, intel, target, level, texture, face); if (obj == NULL) return false; iobj = intel_texture_object(obj); _intel_region_release(&iobj->mt->region); return true; } static bool intelAcquireBufferObj(void *driver, __DRIcontext *driContextPriv, GLuint bufobj, void *user_data) { return false; } static bool intelReleaseBufferObj(void *driver, __DRIcontext *driContextPriv, GLuint bufobj) { return false; } static bool intelAcquireRenderBuffer(void *driver, __DRIcontext *driContextPriv, GLuint bufobj, void *user_data) { return false; } static bool intelReleaseRenderBuffer(void *driver, __DRIcontext *driContextPriv, GLuint bufobj) { return false; } #include "cl_driver.h" void intel_set_cl_gl_callbacks(void) { cl_gl_acquire_texture = (cl_gl_acquire_texture_cb*)intelAcquireTexture; cl_gl_release_texture = (cl_gl_release_texture_cb*)intelReleaseTexture; cl_gl_acquire_buffer_object = (cl_gl_acquire_buffer_object_cb*)intelAcquireBufferObj; cl_gl_release_buffer_object = (cl_gl_release_buffer_object_cb*)intelReleaseBufferObj; cl_gl_acquire_render_buffer = (cl_gl_acquire_render_buffer_cb*)intelAcquireRenderBuffer; cl_gl_release_render_buffer = (cl_gl_release_render_buffer_cb*)intelReleaseRenderBuffer; } Release_v0.3/src/intel/intel_dri_resource_sharing.h000066400000000000000000000014511223142177000226520ustar00rootroot00000000000000#ifndef __INTEL_DRI_RESOURCE_SHARING_H__ #define __INTEL_DRI_RESOURCE_SHARING_H__ struct _intel_dri_share_image_region { unsigned int name; size_t w; size_t h; size_t depth; size_t pitch; int tiling; size_t offset; size_t tile_x; size_t tile_y; unsigned int gl_format; size_t row_pitch, slice_pitch; }; struct _intel_dri_share_buffer_object { unsigned int name; size_t sz; size_t offset; }; inline static struct _intel_dri_share_image_region * intel_dri_share_image_region(void *user_data) { return (struct _intel_dri_share_image_region *)user_data; } inline static struct _intel_dri_share_buffer_object * intel_dri_share_buffer_object(void *user_data) { return (struct _intel_dri_share_buffer_object *)user_data; } extern void intel_set_cl_gl_callbacks(void); #endif Release_v0.3/src/intel/intel_dri_resource_sharing_int.h000066400000000000000000000075151223142177000235330ustar00rootroot00000000000000/***************************************************************** * The following functions are copied from i965 driver, commit * id 292368570a13501dfa95b1b0dd70966caf6ffc6b. Need to keep consistant * with the dri driver installed on current system. *****************************************************************/ static bool _intel_region_flink(struct intel_region *region, uint32_t *name) { if (region->name == 0) { if (drm_intel_bo_flink(region->bo, ®ion->name)) return false; } *name = region->name; return true; } #define _DBG(...) static void _intel_region_release(struct intel_region **region_handle) { struct intel_region *region = *region_handle; if (region == NULL) { _DBG("%s NULL\n", __FUNCTION__); return; } _DBG("%s %p %d\n", __FUNCTION__, region, region->refcount - 1); ASSERT(region->refcount > 0); region->refcount--; if (region->refcount == 0) { drm_intel_bo_unreference(region->bo); free(region); } *region_handle = NULL; } static void _intel_region_reference(struct intel_region **dst, struct intel_region *src) { _DBG("%s: %p(%d) -> %p(%d)\n", __FUNCTION__, *dst, *dst ? (*dst)->refcount : 0, src, src ? src->refcount : 0); if (src != *dst) { if (*dst) _intel_region_release(dst); if (src) src->refcount++; *dst = src; } } /** * This function computes masks that may be used to select the bits of the X * and Y coordinates that indicate the offset within a tile. If the region is * untiled, the masks are set to 0. */ static void _intel_region_get_tile_masks(struct intel_region *region, uint32_t *mask_x, uint32_t *mask_y, bool map_stencil_as_y_tiled) { int cpp = region->cpp; uint32_t tiling = region->tiling; if (map_stencil_as_y_tiled) tiling = I915_TILING_Y; switch (tiling) { default: assert(false); case I915_TILING_NONE: *mask_x = *mask_y = 0; break; case I915_TILING_X: *mask_x = 512 / cpp - 1; *mask_y = 7; break; case I915_TILING_Y: *mask_x = 128 / cpp - 1; *mask_y = 31; break; } } /** * Compute the offset (in bytes) from the start of the region to the given x * and y coordinate. For tiled regions, caller must ensure that x and y are * multiples of the tile size. */ static uint32_t _intel_region_get_aligned_offset(struct intel_region *region, uint32_t x, uint32_t y, bool map_stencil_as_y_tiled) { int cpp = region->cpp; uint32_t pitch = region->pitch; uint32_t tiling = region->tiling; if (map_stencil_as_y_tiled) { tiling = I915_TILING_Y; /* When mapping a W-tiled stencil buffer as Y-tiled, each 64-high W-tile * gets transformed into a 32-high Y-tile. Accordingly, the pitch of * the resulting region is twice the pitch of the original region, since * each row in the Y-tiled view corresponds to two rows in the actual * W-tiled surface. So we need to correct the pitch before computing * the offsets. */ pitch *= 2; } switch (tiling) { default: assert(false); case I915_TILING_NONE: return y * pitch + x * cpp; case I915_TILING_X: assert((x % (512 / cpp)) == 0); assert((y % 8) == 0); return y * pitch + x / (512 / cpp) * 4096; case I915_TILING_Y: assert((x % (128 / cpp)) == 0); assert((y % 32) == 0); return y * pitch + x / (128 / cpp) * 4096; } } static void _intel_miptree_get_image_offset(struct intel_mipmap_tree *mt, GLuint level, GLuint slice, GLuint *x, GLuint *y) { assert(slice < mt->level[level].depth); *x = mt->level[level].slice[slice].x_offset; *y = mt->level[level].slice[slice].y_offset; } Release_v0.3/src/intel/intel_driver.c000066400000000000000000000464171223142177000177530ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /* * Copyright 2009 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * Authors: * Xiang Haihao * Zou Nan hai * */ #if defined(HAS_EGL) #include "GL/gl.h" #include "EGL/egl.h" #include "x11/mesa_egl_extension.h" #endif #include "intel_driver.h" #include "intel_gpgpu.h" #include "intel_batchbuffer.h" #include "intel_bufmgr.h" #include #include "x11/dricommon.h" #include "cl_mem.h" #include #include #include #include #include #include #include #include #include "cl_utils.h" #include "cl_alloc.h" #include "cl_context.h" #include "cl_driver.h" #include "cl_device_id.h" #include "cl_platform_id.h" #define SET_BLOCKED_SIGSET(DRIVER) do { \ sigset_t bl_mask; \ sigfillset(&bl_mask); \ sigdelset(&bl_mask, SIGFPE); \ sigdelset(&bl_mask, SIGILL); \ sigdelset(&bl_mask, SIGSEGV); \ sigdelset(&bl_mask, SIGBUS); \ sigdelset(&bl_mask, SIGKILL); \ pthread_sigmask(SIG_SETMASK, &bl_mask, &(DRIVER)->sa_mask); \ } while (0) #define RESTORE_BLOCKED_SIGSET(DRIVER) do { \ pthread_sigmask(SIG_SETMASK, &(DRIVER)->sa_mask, NULL); \ } while (0) #define PPTHREAD_MUTEX_LOCK(DRIVER) do { \ SET_BLOCKED_SIGSET(DRIVER); \ pthread_mutex_lock(&(DRIVER)->ctxmutex); \ } while (0) #define PPTHREAD_MUTEX_UNLOCK(DRIVER) do { \ pthread_mutex_unlock(&(DRIVER)->ctxmutex); \ RESTORE_BLOCKED_SIGSET(DRIVER); \ } while (0) static void intel_driver_delete(intel_driver_t *driver) { if (driver == NULL) return; if (driver->bufmgr) drm_intel_bufmgr_destroy(driver->bufmgr); cl_free(driver); } static intel_driver_t* intel_driver_new(void) { intel_driver_t *driver = NULL; TRY_ALLOC_NO_ERR (driver, CALLOC(intel_driver_t)); driver->fd = -1; exit: return driver; error: intel_driver_delete(driver); driver = NULL; goto exit; } /* just used for maximum relocation number in drm_intel */ #define BATCH_SIZE 0x1000 static void intel_driver_memman_init(intel_driver_t *driver) { driver->bufmgr = drm_intel_bufmgr_gem_init(driver->fd, BATCH_SIZE); assert(driver->bufmgr); drm_intel_bufmgr_gem_enable_reuse(driver->bufmgr); } static void intel_driver_init(intel_driver_t *driver, int dev_fd) { driver->fd = dev_fd; driver->locked = 0; pthread_mutex_init(&driver->ctxmutex, NULL); #ifndef NDEBUG int res = #endif /* NDEBUG */ intel_driver_get_param(driver, I915_PARAM_CHIPSET_ID, &driver->device_id); assert(res); intel_driver_memman_init(driver); #if EMULATE_GEN driver->gen_ver = EMULATE_GEN; if (EMULATE_GEN == 75) driver->device_id = PCI_CHIP_HASWELL_L; /* we pick L for HSW */ else if (EMULATE_GEN == 7) driver->device_id = PCI_CHIP_IVYBRIDGE_GT2; /* we pick GT2 for IVB */ else if (EMULATE_GEN == 6) driver->device_id = PCI_CHIP_SANDYBRIDGE_GT2; /* we pick GT2 for SNB */ else FATAL ("Unsupported Gen for emulation"); #else if (IS_GEN75(driver->device_id)) driver->gen_ver = 75; else if (IS_GEN7(driver->device_id)) driver->gen_ver = 7; else if (IS_GEN6(driver->device_id)) driver->gen_ver = 6; else if(IS_IGDNG(driver->device_id)) driver->gen_ver = 5; else driver->gen_ver = 4; #endif /* EMULATE_GEN */ } static void intel_driver_open(intel_driver_t *intel, cl_context_prop props) { int cardi; char *driver_name; if (props != NULL && props->gl_type != CL_GL_NOSHARE && props->gl_type != CL_GL_GLX_DISPLAY && props->gl_type != CL_GL_EGL_DISPLAY) { printf("Unsupported gl share type %d.\n", props->gl_type); exit(-1); } intel->x11_display = XOpenDisplay(NULL); if(intel->x11_display) { if((intel->dri_ctx = getDRI2State(intel->x11_display, DefaultScreen(intel->x11_display), &driver_name))) { intel_driver_init_shared(intel, intel->dri_ctx); Xfree(driver_name); } else printf("X server found. dri2 connection failed! \n"); } else { printf("Can't find X server!\n"); } if(!intel_driver_is_active(intel)) { printf("Trying to open directly..."); char card_name[20]; for(cardi = 0; cardi < 16; cardi++) { sprintf(card_name, "/dev/dri/card%d", cardi); if(intel_driver_init_master(intel, card_name)) { printf("Success at %s.\n", card_name); break; } } } if(!intel_driver_is_active(intel)) { printf("Device open failed\n"); exit(-1); } #ifdef HAS_EGL if (props && props->gl_type == CL_GL_EGL_DISPLAY) { assert(props->egl_display); } #endif } static void intel_driver_close(intel_driver_t *intel) { if(intel->dri_ctx) dri_state_release(intel->dri_ctx); if(intel->x11_display) XCloseDisplay(intel->x11_display); if(intel->fd) close(intel->fd); intel->dri_ctx = NULL; intel->x11_display = NULL; intel->fd = 0; } LOCAL int intel_driver_get_param(intel_driver_t *driver, int param, int *value) { int ret; struct drm_i915_getparam gp; memset(&gp, 0, sizeof(struct drm_i915_getparam)); gp.param = param; gp.value = value; ret = drmCommandWriteRead(driver->fd, DRM_I915_GETPARAM, &gp, sizeof(gp)); return ret == 0; } LOCAL int intel_driver_is_active(intel_driver_t *driver) { return driver->fd >= 0; } LOCAL int intel_driver_init_shared(intel_driver_t *driver, dri_state_t *state) { assert(state); if(state->driConnectedFlag != DRI2) return 0; intel_driver_init(driver, state->fd); driver->master = 0; return 1; } LOCAL int intel_driver_init_master(intel_driver_t *driver, const char* dev_name) { int dev_fd; drm_client_t client; // usually dev_name = "/dev/dri/card%d" dev_fd = open(dev_name, O_RDWR); if (dev_fd == -1) return 0; // Check that we're authenticated and the only opener memset(&client, 0, sizeof(drm_client_t)); int ret = ioctl(dev_fd, DRM_IOCTL_GET_CLIENT, &client); assert (ret == 0); if (!client.auth) { close(dev_fd); return 0; } client.idx = 1; ret = ioctl(dev_fd, DRM_IOCTL_GET_CLIENT, &client); if (ret != -1 || errno != EINVAL) { close(dev_fd); return 0; } intel_driver_init(driver, dev_fd); driver->master = 1; return 1; } LOCAL int intel_driver_terminate(intel_driver_t *driver) { pthread_mutex_destroy(&driver->ctxmutex); if(driver->master) close(driver->fd); driver->fd = -1; return 1; } LOCAL void intel_driver_lock_hardware(intel_driver_t *driver) { PPTHREAD_MUTEX_LOCK(driver); assert(!driver->locked); driver->locked = 1; } LOCAL void intel_driver_unlock_hardware(intel_driver_t *driver) { driver->locked = 0; PPTHREAD_MUTEX_UNLOCK(driver); } LOCAL dri_bo* intel_driver_share_buffer(intel_driver_t *driver, const char *sname, uint32_t name) { assert(!driver->master); dri_bo *bo = intel_bo_gem_create_from_name(driver->bufmgr, sname, name); return bo; } LOCAL uint32_t intel_driver_shared_name(intel_driver_t *driver, dri_bo *bo) { uint32_t name; assert(!driver->master); assert(bo); dri_bo_flink(bo, &name); return name; } /* XXX a null props is ok? */ static int intel_get_device_id(void) { intel_driver_t *driver = NULL; int intel_device_id; driver = intel_driver_new(); assert(driver != NULL); intel_driver_open(driver, NULL); intel_device_id = driver->device_id; intel_driver_close(driver); intel_driver_terminate(driver); intel_driver_delete(driver); return intel_device_id; } static void cl_intel_driver_delete(intel_driver_t *driver) { if (driver == NULL) return; intel_driver_close(driver); intel_driver_terminate(driver); intel_driver_delete(driver); } #include "program.h" static intel_driver_t* cl_intel_driver_new(cl_context_prop props) { intel_driver_t *driver = NULL; TRY_ALLOC_NO_ERR (driver, intel_driver_new()); intel_driver_open(driver, props); /* We use the first 2 slots(0,1) for all the bufs. * Notify the gbe this base index, thus gbe can avoid conflicts * when it allocates slots for images*/ gbe_set_image_base_index(3); exit: return driver; error: cl_intel_driver_delete(driver); driver = NULL; goto exit; } static drm_intel_bufmgr* intel_driver_get_bufmgr(intel_driver_t *drv) { return drv->bufmgr; } static uint32_t intel_driver_get_ver(struct intel_driver *drv) { return drv->gen_ver; } static size_t drm_intel_bo_get_size(drm_intel_bo *bo) { return bo->size; } static void* drm_intel_bo_get_virtual(drm_intel_bo *bo) { return bo->virtual; } static int get_cl_tiling(uint32_t drm_tiling) { switch(drm_tiling) { case I915_TILING_X: return CL_TILE_X; case I915_TILING_Y: return CL_TILE_Y; case I915_TILING_NONE: return CL_NO_TILE; default: assert(0); } return CL_NO_TILE; } #if defined(HAS_EGL) #include "intel_dri_resource_sharing.h" #include "cl_image.h" static int cl_get_clformat_from_texture(GLint tex_format, cl_image_format * cl_format) { cl_int ret = CL_SUCCESS; switch (tex_format) { case GL_RGBA8: case GL_RGBA: case GL_RGBA16: case GL_RGBA8I: case GL_RGBA16I: case GL_RGBA32I: case GL_RGBA8UI: case GL_RGBA16UI: case GL_RGBA32UI: case GL_RGBA16F: case GL_RGBA32F: cl_format->image_channel_order = CL_RGBA; break; case GL_BGRA: cl_format->image_channel_order = CL_BGRA; break; default: ret = -1; goto error; } switch (tex_format) { case GL_RGBA8: case GL_RGBA: case GL_BGRA: cl_format->image_channel_data_type = CL_UNORM_INT8; break; case GL_RGBA16: cl_format->image_channel_data_type = CL_UNORM_INT16; break; case GL_RGBA8I: cl_format->image_channel_data_type = CL_SIGNED_INT8; break; case GL_RGBA16I: cl_format->image_channel_data_type = CL_SIGNED_INT16; break; case GL_RGBA32I: cl_format->image_channel_data_type = CL_SIGNED_INT32; break; case GL_RGBA8UI: cl_format->image_channel_data_type = CL_UNSIGNED_INT8; break; case GL_RGBA16UI: cl_format->image_channel_data_type = CL_UNSIGNED_INT16; break; case GL_RGBA32UI: cl_format->image_channel_data_type = CL_UNSIGNED_INT32; break; case GL_RGBA16F: cl_format->image_channel_data_type = CL_HALF_FLOAT; break; case GL_RGBA32F: cl_format->image_channel_order = CL_FLOAT; break; default: ret = -1; goto error; } error: return ret; } static int get_mem_type_from_target(GLenum texture_target, cl_mem_object_type *type) { switch(texture_target) { case GL_TEXTURE_1D: *type = CL_MEM_OBJECT_IMAGE1D; break; case GL_TEXTURE_2D: *type = CL_MEM_OBJECT_IMAGE2D; break; case GL_TEXTURE_3D: *type = CL_MEM_OBJECT_IMAGE3D; break; case GL_TEXTURE_1D_ARRAY: *type = CL_MEM_OBJECT_IMAGE1D_ARRAY; break; case GL_TEXTURE_2D_ARRAY: *type = CL_MEM_OBJECT_IMAGE2D_ARRAY; break; default: return -1; } return CL_SUCCESS; } static cl_buffer intel_alloc_buffer_from_texture_egl(cl_context ctx, unsigned int target, int miplevel, unsigned int texture, struct _cl_mem_image *image) { cl_buffer bo = (cl_buffer) NULL; struct _intel_dri_share_image_region region; unsigned int bpp, intel_fmt; cl_image_format cl_format; EGLBoolean ret; EGLint attrib_list[] = { EGL_GL_TEXTURE_ID_MESA, texture, EGL_GL_TEXTURE_LEVEL_MESA, miplevel, EGL_GL_TEXTURE_TARGET_MESA, target, EGL_NONE}; ret = eglAcquireResourceMESA(EGL_DISP(ctx), EGL_CTX(ctx), EGL_GL_TEXTURE_MESA, &attrib_list[0], ®ion); if (!ret) goto out; bo = (cl_buffer)intel_driver_share_buffer((intel_driver_t *)ctx->drv, "rendering buffer", region.name); if (bo == NULL) { eglReleaseResourceMESA(EGL_DISP(ctx), EGL_CTX(ctx), EGL_GL_TEXTURE_MESA, &attrib_list[0]); goto out; } region.tiling = get_cl_tiling(region.tiling); if (cl_get_clformat_from_texture(region.gl_format, &cl_format) != 0) goto error; intel_fmt = cl_image_get_intel_format(&cl_format); if (intel_fmt == INTEL_UNSUPPORTED_FORMAT) goto error; cl_image_byte_per_pixel(&cl_format, &bpp); cl_mem_object_type image_type; if (get_mem_type_from_target(target, &image_type) != 0) goto error; cl_mem_image_init(image, region.w, region.h, image_type, region.depth, cl_format, intel_fmt, bpp, region.row_pitch, region.slice_pitch, region.tiling, region.tile_x, region.tile_y, region.offset); out: return bo; error: cl_buffer_unreference(bo); eglReleaseResourceMESA(EGL_DISP(ctx), EGL_CTX(ctx), EGL_GL_TEXTURE_MESA, &attrib_list[0]); return NULL; } static cl_buffer intel_alloc_buffer_from_texture(cl_context ctx, unsigned int target, int miplevel, unsigned int texture, struct _cl_mem_image *image) { if (IS_EGL_CONTEXT(ctx)) return intel_alloc_buffer_from_texture_egl(ctx, target, miplevel, texture, image); return NULL; } static int intel_release_buffer_from_texture(cl_context ctx, unsigned int target, int miplevel, unsigned int texture) { if (IS_EGL_CONTEXT(ctx)) { EGLint attrib_list[] = { EGL_GL_TEXTURE_ID_MESA, texture, EGL_GL_TEXTURE_LEVEL_MESA, miplevel, EGL_GL_TEXTURE_TARGET_MESA, target, EGL_NONE}; eglReleaseResourceMESA(EGL_DISP(ctx), EGL_CTX(ctx), EGL_GL_TEXTURE_MESA, &attrib_list[0]); return CL_SUCCESS; } return -1; } #endif cl_buffer intel_share_buffer_from_libva(cl_context ctx, unsigned int bo_name, size_t *sz) { drm_intel_bo *intel_bo; intel_bo = intel_driver_share_buffer((intel_driver_t *)ctx->drv, "shared from libva", bo_name); if (sz) *sz = intel_bo->size; return (cl_buffer)intel_bo; } cl_buffer intel_share_image_from_libva(cl_context ctx, unsigned int bo_name, struct _cl_mem_image *image) { drm_intel_bo *intel_bo; uint32_t intel_tiling, intel_swizzle_mode; intel_bo = intel_driver_share_buffer((intel_driver_t *)ctx->drv, "shared from libva", bo_name); drm_intel_bo_get_tiling(intel_bo, &intel_tiling, &intel_swizzle_mode); image->tiling = get_cl_tiling(intel_tiling); return (cl_buffer)intel_bo; } static int32_t get_intel_tiling(cl_int tiling, uint32_t *intel_tiling) { switch (tiling) { case CL_NO_TILE: *intel_tiling = I915_TILING_NONE; break; case CL_TILE_X: *intel_tiling = I915_TILING_X; break; case CL_TILE_Y: *intel_tiling = I915_TILING_Y; break; default: assert(0); return -1; } return 0; } static int intel_buffer_set_tiling(cl_buffer bo, cl_image_tiling_t tiling, size_t stride) { uint32_t intel_tiling, required_tiling; int ret; if (UNLIKELY((get_intel_tiling(tiling, &intel_tiling)) < 0)) return -1; required_tiling = intel_tiling; ret = drm_intel_bo_set_tiling((drm_intel_bo*)bo, &intel_tiling, stride); assert(intel_tiling == required_tiling); return ret; } LOCAL void intel_setup_callbacks(void) { cl_driver_new = (cl_driver_new_cb *) cl_intel_driver_new; cl_driver_delete = (cl_driver_delete_cb *) cl_intel_driver_delete; cl_driver_get_ver = (cl_driver_get_ver_cb *) intel_driver_get_ver; cl_driver_get_bufmgr = (cl_driver_get_bufmgr_cb *) intel_driver_get_bufmgr; cl_driver_get_device_id = (cl_driver_get_device_id_cb *) intel_get_device_id; cl_buffer_alloc = (cl_buffer_alloc_cb *) drm_intel_bo_alloc; cl_buffer_set_tiling = (cl_buffer_set_tiling_cb *) intel_buffer_set_tiling; #if defined(HAS_EGL) cl_buffer_alloc_from_texture = (cl_buffer_alloc_from_texture_cb *) intel_alloc_buffer_from_texture; cl_buffer_release_from_texture = (cl_buffer_release_from_texture_cb *) intel_release_buffer_from_texture; intel_set_cl_gl_callbacks(); #endif cl_buffer_get_buffer_from_libva = (cl_buffer_get_buffer_from_libva_cb *) intel_share_buffer_from_libva; cl_buffer_get_image_from_libva = (cl_buffer_get_image_from_libva_cb *) intel_share_image_from_libva; cl_buffer_reference = (cl_buffer_reference_cb *) drm_intel_bo_reference; cl_buffer_unreference = (cl_buffer_unreference_cb *) drm_intel_bo_unreference; cl_buffer_map = (cl_buffer_map_cb *) drm_intel_bo_map; cl_buffer_unmap = (cl_buffer_unmap_cb *) drm_intel_bo_unmap; cl_buffer_map_gtt = (cl_buffer_map_gtt_cb *) drm_intel_gem_bo_map_gtt; cl_buffer_unmap_gtt = (cl_buffer_unmap_gtt_cb *) drm_intel_gem_bo_unmap_gtt; cl_buffer_map_gtt_unsync = (cl_buffer_map_gtt_unsync_cb *) drm_intel_gem_bo_map_unsynchronized; cl_buffer_get_virtual = (cl_buffer_get_virtual_cb *) drm_intel_bo_get_virtual; cl_buffer_get_size = (cl_buffer_get_size_cb *) drm_intel_bo_get_size; cl_buffer_pin = (cl_buffer_pin_cb *) drm_intel_bo_pin; cl_buffer_unpin = (cl_buffer_unpin_cb *) drm_intel_bo_unpin; cl_buffer_subdata = (cl_buffer_subdata_cb *) drm_intel_bo_subdata; cl_buffer_wait_rendering = (cl_buffer_wait_rendering_cb *) drm_intel_bo_wait_rendering; intel_set_gpgpu_callbacks(); } Release_v0.3/src/intel/intel_driver.h000066400000000000000000000104371223142177000177510ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /* * Copyright 2009 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * */ #ifndef _INTEL_DRIVER_H_ #define _INTEL_DRIVER_H_ #include "cl_device_data.h" #include #include #include #include #include #include #include #define CMD_MI (0x0 << 29) #define CMD_2D (0x2 << 29) #define MI_NOOP (CMD_MI | 0) #define MI_BATCH_BUFFER_END (CMD_MI | (0xA << 23)) #define MI_FLUSH (CMD_MI | (0x4 << 23)) #define STATE_INSTRUCTION_CACHE_INVALIDATE (0x1 << 0) #define XY_COLOR_BLT_CMD (CMD_2D | (0x50 << 22) | 0x04) #define XY_COLOR_BLT_WRITE_ALPHA (1 << 21) #define XY_COLOR_BLT_WRITE_RGB (1 << 20) #define XY_COLOR_BLT_DST_TILED (1 << 11) /* BR13 */ #define BR13_565 (0x1 << 24) #define BR13_8888 (0x3 << 24) struct dri_state; typedef struct _XDisplay Display; typedef struct intel_driver { dri_bufmgr *bufmgr; int fd; int device_id; int gen_ver; sigset_t sa_mask; pthread_mutex_t ctxmutex; int locked; int master; Display *x11_display; struct dri_state *dri_ctx; } intel_driver_t; /* device control */ extern void intel_driver_lock_hardware(intel_driver_t*); extern void intel_driver_unlock_hardware(intel_driver_t*); /* methods working in shared mode */ extern dri_bo* intel_driver_share_buffer(intel_driver_t*, const char *sname, uint32_t name); extern uint32_t intel_driver_shared_name(intel_driver_t*, dri_bo*); /* init driver shared with X using dri state, acquired from X Display */ extern int intel_driver_init_shared(intel_driver_t*, struct dri_state*); /* init driver in master mode (when X is not using the card) * usually dev_name = "/dev/dri/card0" */ extern int intel_driver_init_master(intel_driver_t*, const char* dev_name); /* terminate driver and all underlying structures */ extern int intel_driver_terminate(intel_driver_t*); /* simple check if driver was initialized (checking fd should suffice) */ extern int intel_driver_is_active(intel_driver_t*); /* query device parameters using driver ioctl */ extern int intel_driver_get_param(intel_driver_t*, int param, int *value); /* init the call backs used by the ocl driver */ extern void intel_setup_callbacks(void); #endif /* _INTEL_DRIVER_H_ */ Release_v0.3/src/intel/intel_gpgpu.c000066400000000000000000001103351223142177000175710ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia * Alexei Soupikov */ #include #include #include #include #include #include #include #include #include #include #include "intel/intel_gpgpu.h" #include "intel/intel_defines.h" #include "intel/intel_structs.h" #include "intel/intel_batchbuffer.h" #include "intel/intel_driver.h" #include "cl_alloc.h" #include "cl_utils.h" #include "cl_sampler.h" #ifndef CL_VERSION_1_2 #define CL_MEM_OBJECT_IMAGE1D 0x10F4 #define CL_MEM_OBJECT_IMAGE1D_ARRAY 0x10F5 #define CL_MEM_OBJECT_IMAGE1D_BUFFER 0x10F6 #define CL_MEM_OBJECT_IMAGE2D_ARRAY 0x10F3 #endif #define GEN_CMD_MEDIA_OBJECT (0x71000000) #define MO_TS_BIT (1 << 24) #define MO_RETAIN_BIT (1 << 28) #define SAMPLER_STATE_SIZE (16) /* Stores both binding tables and surface states */ typedef struct surface_heap { uint32_t binding_table[256]; char surface[256][sizeof(gen6_surface_state_t)]; } surface_heap_t; typedef struct intel_event { intel_batchbuffer_t *batch; drm_intel_bo* buffer; drm_intel_bo* ts_buf; int status; } intel_event_t; #define MAX_IF_DESC 32 /* We can bind only a limited number of buffers */ enum { max_buf_n = 128 }; enum { max_img_n = 32 }; enum {max_sampler_n = 16 }; /* Handle GPGPU state */ struct intel_gpgpu { intel_driver_t *drv; intel_batchbuffer_t *batch; cl_gpgpu_kernel *ker; drm_intel_bo *binded_buf[max_buf_n]; /* all buffers binded for the call */ uint32_t binded_offset[max_buf_n]; /* their offsets in the curbe buffer */ uint32_t binded_n; /* number of buffers binded */ unsigned long img_bitmap; /* image usage bitmap. */ unsigned int img_index_base; /* base index for image surface.*/ drm_intel_bo *binded_img[max_img_n]; /* all images binded for the call */ unsigned long sampler_bitmap; /* sampler usage bitmap. */ struct { drm_intel_bo *bo; } stack_b; struct { drm_intel_bo *bo; } idrt_b; struct { drm_intel_bo *bo; } surface_heap_b; struct { drm_intel_bo *bo; } vfe_state_b; struct { drm_intel_bo *bo; } curbe_b; struct { drm_intel_bo *bo; } sampler_state_b; struct { drm_intel_bo *bo; } sampler_border_color_state_b; struct { drm_intel_bo *bo; } perf_b; struct { drm_intel_bo *bo; } scratch_b; struct { drm_intel_bo *bo; } constant_b; struct { drm_intel_bo *bo; } time_stamp_b; /* time stamp buffer */ uint32_t per_thread_scratch; struct { uint32_t num_cs_entries; uint32_t size_cs_entry; /* size of one entry in 512bit elements */ } urb; uint32_t max_threads; /* max threads requested by the user */ }; typedef struct intel_gpgpu intel_gpgpu_t; static void intel_gpgpu_sync(intel_gpgpu_t *gpgpu) { if (gpgpu->batch->last_bo) drm_intel_bo_wait_rendering(gpgpu->batch->last_bo); } static void intel_gpgpu_delete(intel_gpgpu_t *gpgpu) { if (gpgpu == NULL) return; if(gpgpu->time_stamp_b.bo) drm_intel_bo_unreference(gpgpu->time_stamp_b.bo); if (gpgpu->surface_heap_b.bo) drm_intel_bo_unreference(gpgpu->surface_heap_b.bo); if (gpgpu->idrt_b.bo) drm_intel_bo_unreference(gpgpu->idrt_b.bo); if (gpgpu->vfe_state_b.bo) drm_intel_bo_unreference(gpgpu->vfe_state_b.bo); if (gpgpu->curbe_b.bo) drm_intel_bo_unreference(gpgpu->curbe_b.bo); if (gpgpu->sampler_state_b.bo) drm_intel_bo_unreference(gpgpu->sampler_state_b.bo); if (gpgpu->sampler_border_color_state_b.bo) drm_intel_bo_unreference(gpgpu->sampler_border_color_state_b.bo); if (gpgpu->perf_b.bo) drm_intel_bo_unreference(gpgpu->perf_b.bo); if (gpgpu->stack_b.bo) drm_intel_bo_unreference(gpgpu->stack_b.bo); if (gpgpu->scratch_b.bo) drm_intel_bo_unreference(gpgpu->scratch_b.bo); if(gpgpu->constant_b.bo) drm_intel_bo_unreference(gpgpu->constant_b.bo); intel_batchbuffer_delete(gpgpu->batch); cl_free(gpgpu); } static intel_gpgpu_t* intel_gpgpu_new(intel_driver_t *drv) { intel_gpgpu_t *state = NULL; TRY_ALLOC_NO_ERR (state, CALLOC(intel_gpgpu_t)); state->drv = drv; state->batch = intel_batchbuffer_new(state->drv); assert(state->batch); exit: return state; error: intel_gpgpu_delete(state); state = NULL; goto exit; } static void intel_gpgpu_select_pipeline(intel_gpgpu_t *gpgpu) { BEGIN_BATCH(gpgpu->batch, 1); OUT_BATCH(gpgpu->batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA); ADVANCE_BATCH(gpgpu->batch); } static void intel_gpgpu_set_base_address(intel_gpgpu_t *gpgpu) { const uint32_t def_cc = cc_llc_l3; /* default Cache Control value */ BEGIN_BATCH(gpgpu->batch, 10); OUT_BATCH(gpgpu->batch, CMD_STATE_BASE_ADDRESS | 8); /* 0, Gen State Mem Obj CC, Stateless Mem Obj CC, Stateless Access Write Back */ OUT_BATCH(gpgpu->batch, 0 | (def_cc << 8) | (def_cc << 4) | (0 << 3)| BASE_ADDRESS_MODIFY); /* General State Base Addr */ /* 0, State Mem Obj CC */ /* We use a state base address for the surface heap since IVB clamp the * binding table pointer at 11 bits. So, we cannot use pointers directly while * using the surface heap */ OUT_RELOC(gpgpu->batch, gpgpu->surface_heap_b.bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0 | (def_cc << 8) | (def_cc << 4) | (0 << 3)| BASE_ADDRESS_MODIFY); OUT_BATCH(gpgpu->batch, 0 | (def_cc << 8) | BASE_ADDRESS_MODIFY); /* Dynamic State Base Addr */ OUT_BATCH(gpgpu->batch, 0 | (def_cc << 8) | BASE_ADDRESS_MODIFY); /* Indirect Obj Base Addr */ OUT_BATCH(gpgpu->batch, 0 | (def_cc << 8) | BASE_ADDRESS_MODIFY); /* Instruction Base Addr */ /* If we output an AUB file, we limit the total size to 64MB */ #if USE_FULSIM OUT_BATCH(gpgpu->batch, 0x04000000 | BASE_ADDRESS_MODIFY); /* General State Access Upper Bound */ OUT_BATCH(gpgpu->batch, 0x04000000 | BASE_ADDRESS_MODIFY); /* Dynamic State Access Upper Bound */ OUT_BATCH(gpgpu->batch, 0x04000000 | BASE_ADDRESS_MODIFY); /* Indirect Obj Access Upper Bound */ OUT_BATCH(gpgpu->batch, 0x04000000 | BASE_ADDRESS_MODIFY); /* Instruction Access Upper Bound */ #else OUT_BATCH(gpgpu->batch, 0 | BASE_ADDRESS_MODIFY); /* According to mesa i965 driver code, we must set the dynamic state access upper bound * to a valid bound value, otherwise, the border color pointer may be rejected and you * may get incorrect border color. This is a known hardware bug. */ OUT_BATCH(gpgpu->batch, 0xfffff000 | BASE_ADDRESS_MODIFY); OUT_BATCH(gpgpu->batch, 0 | BASE_ADDRESS_MODIFY); OUT_BATCH(gpgpu->batch, 0 | BASE_ADDRESS_MODIFY); #endif /* USE_FULSIM */ ADVANCE_BATCH(gpgpu->batch); } static void intel_gpgpu_load_vfe_state(intel_gpgpu_t *gpgpu) { BEGIN_BATCH(gpgpu->batch, 8); OUT_BATCH(gpgpu->batch, CMD_MEDIA_STATE_POINTERS | (8-2)); if(gpgpu->per_thread_scratch > 0) { OUT_RELOC(gpgpu->batch, gpgpu->scratch_b.bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, gpgpu->per_thread_scratch/1024 - 1); } else { OUT_BATCH(gpgpu->batch, 0); } /* max_thread | urb entries | (reset_gateway|bypass_gate_way | gpgpu_mode) */ OUT_BATCH(gpgpu->batch, 0 | ((gpgpu->max_threads - 1) << 16) | (64 << 8) | 0xc4); OUT_BATCH(gpgpu->batch, 0); /* curbe_size */ OUT_BATCH(gpgpu->batch, 480); OUT_BATCH(gpgpu->batch, 0); OUT_BATCH(gpgpu->batch, 0); OUT_BATCH(gpgpu->batch, 0); ADVANCE_BATCH(gpgpu->batch); } static void intel_gpgpu_load_curbe_buffer(intel_gpgpu_t *gpgpu) { BEGIN_BATCH(gpgpu->batch, 4); OUT_BATCH(gpgpu->batch, CMD(2,0,1) | (4 - 2)); /* length-2 */ OUT_BATCH(gpgpu->batch, 0); /* mbz */ // XXX #if 1 OUT_BATCH(gpgpu->batch, gpgpu->urb.size_cs_entry* gpgpu->urb.num_cs_entries*32); #else OUT_BATCH(gpgpu->batch, 5120); #endif OUT_RELOC(gpgpu->batch, gpgpu->curbe_b.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); ADVANCE_BATCH(gpgpu->batch); } static void intel_gpgpu_load_idrt(intel_gpgpu_t *gpgpu) { BEGIN_BATCH(gpgpu->batch, 4); OUT_BATCH(gpgpu->batch, CMD(2,0,2) | (4 - 2)); /* length-2 */ OUT_BATCH(gpgpu->batch, 0); /* mbz */ OUT_BATCH(gpgpu->batch, 1 << 5); OUT_RELOC(gpgpu->batch, gpgpu->idrt_b.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); ADVANCE_BATCH(gpgpu->batch); } static const uint32_t gpgpu_l3_config_reg1[] = { 0x00080040, 0x02040040, 0x00800040, 0x01000038, 0x02000030, 0x01000038, 0x00000038, 0x00000040, 0x0A140091, 0x09100091, 0x08900091, 0x08900091 }; static const uint32_t gpgpu_l3_config_reg2[] = { 0x00000000, 0x00000000, 0x00080410, 0x00080410, 0x00040410, 0x00040420, 0x00080420, 0x00080020, 0x00204080, 0x00244890, 0x00284490, 0x002444A0 }; /* Emit PIPE_CONTROLs to write the current GPU timestamp into a buffer. */ static void intel_gpgpu_write_timestamp(intel_gpgpu_t *gpgpu, int idx) { BEGIN_BATCH(gpgpu->batch, 5); OUT_BATCH(gpgpu->batch, CMD_PIPE_CONTROL | (5-2)); OUT_BATCH(gpgpu->batch, GEN7_PIPE_CONTROL_WRITE_TIMESTAMP); OUT_RELOC(gpgpu->batch, gpgpu->time_stamp_b.bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, GEN7_PIPE_CONTROL_GLOBAL_GTT_WRITE | idx * sizeof(uint64_t)); OUT_BATCH(gpgpu->batch, 0); OUT_BATCH(gpgpu->batch, 0); ADVANCE_BATCH(); } static void intel_gpgpu_pipe_control(intel_gpgpu_t *gpgpu) { BEGIN_BATCH(gpgpu->batch, SIZEOF32(gen6_pipe_control_t)); gen6_pipe_control_t* pc = (gen6_pipe_control_t*) intel_batchbuffer_alloc_space(gpgpu->batch, 0); memset(pc, 0, sizeof(*pc)); pc->dw0.length = SIZEOF32(gen6_pipe_control_t) - 2; pc->dw0.instruction_subopcode = GEN7_PIPE_CONTROL_SUBOPCODE_3D_CONTROL; pc->dw0.instruction_opcode = GEN7_PIPE_CONTROL_OPCODE_3D_CONTROL; pc->dw0.instruction_pipeline = GEN7_PIPE_CONTROL_3D; pc->dw0.instruction_type = GEN7_PIPE_CONTROL_INSTRUCTION_GFX; pc->dw1.render_target_cache_flush_enable = 1; pc->dw1.texture_cache_invalidation_enable = 1; pc->dw1.cs_stall = 1; pc->dw1.dc_flush_enable = 1; ADVANCE_BATCH(gpgpu->batch); } static void intel_gpgpu_set_L3(intel_gpgpu_t *gpgpu, uint32_t use_slm) { BEGIN_BATCH(gpgpu->batch, 6); OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2 */ OUT_BATCH(gpgpu->batch, GEN7_L3_CNTL_REG2_ADDRESS_OFFSET); if (use_slm) OUT_BATCH(gpgpu->batch, gpgpu_l3_config_reg1[8]); else OUT_BATCH(gpgpu->batch, gpgpu_l3_config_reg1[4]); OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2 */ OUT_BATCH(gpgpu->batch, GEN7_L3_CNTL_REG3_ADDRESS_OFFSET); if (use_slm) OUT_BATCH(gpgpu->batch, gpgpu_l3_config_reg2[8]); else OUT_BATCH(gpgpu->batch, gpgpu_l3_config_reg2[4]); ADVANCE_BATCH(gpgpu->batch); intel_gpgpu_pipe_control(gpgpu); } static void intel_gpgpu_batch_start(intel_gpgpu_t *gpgpu) { intel_batchbuffer_start_atomic(gpgpu->batch, 256); intel_gpgpu_pipe_control(gpgpu); intel_gpgpu_set_L3(gpgpu, gpgpu->ker->use_slm); intel_gpgpu_select_pipeline(gpgpu); intel_gpgpu_set_base_address(gpgpu); intel_gpgpu_load_vfe_state(gpgpu); intel_gpgpu_load_curbe_buffer(gpgpu); intel_gpgpu_load_idrt(gpgpu); if (gpgpu->perf_b.bo) { BEGIN_BATCH(gpgpu->batch, 3); OUT_BATCH(gpgpu->batch, (0x28 << 23) | /* MI_REPORT_PERF_COUNT */ (3 - 2)); /* length-2 */ OUT_RELOC(gpgpu->batch, gpgpu->perf_b.bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0 | /* Offset for the start "counters" */ 1); /* Use GTT and not PGTT */ OUT_BATCH(gpgpu->batch, 0); ADVANCE_BATCH(gpgpu->batch); } /* Insert PIPE_CONTROL for time stamp of start*/ if (gpgpu->time_stamp_b.bo) intel_gpgpu_write_timestamp(gpgpu, 0); } static void intel_gpgpu_batch_end(intel_gpgpu_t *gpgpu, int32_t flush_mode) { /* Insert PIPE_CONTROL for time stamp of end*/ if (gpgpu->time_stamp_b.bo) intel_gpgpu_write_timestamp(gpgpu, 1); /* Insert the performance counter command */ if (gpgpu->perf_b.bo) { BEGIN_BATCH(gpgpu->batch, 3); OUT_BATCH(gpgpu->batch, (0x28 << 23) | /* MI_REPORT_PERF_COUNT */ (3 - 2)); /* length-2 */ OUT_RELOC(gpgpu->batch, gpgpu->perf_b.bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 512 | /* Offset for the end "counters" */ 1); /* Use GTT and not PGTT */ OUT_BATCH(gpgpu->batch, 0); ADVANCE_BATCH(gpgpu->batch); } if(flush_mode) intel_gpgpu_pipe_control(gpgpu); intel_batchbuffer_end_atomic(gpgpu->batch); } static void intel_gpgpu_batch_reset(intel_gpgpu_t *gpgpu, size_t sz) { intel_batchbuffer_reset(gpgpu->batch, sz); } /* check we do not get a 0 starting address for binded buf */ static void intel_gpgpu_check_binded_buf_address(intel_gpgpu_t *gpgpu) { uint32_t i; for (i = 0; i < gpgpu->binded_n; ++i) assert(gpgpu->binded_buf[i]->offset != 0); } static void intel_gpgpu_flush(intel_gpgpu_t *gpgpu) { intel_batchbuffer_emit_mi_flush(gpgpu->batch); intel_batchbuffer_flush(gpgpu->batch); intel_gpgpu_check_binded_buf_address(gpgpu); } static void intel_gpgpu_state_init(intel_gpgpu_t *gpgpu, uint32_t max_threads, uint32_t size_cs_entry, int profiling) { drm_intel_bufmgr *bufmgr = gpgpu->drv->bufmgr; drm_intel_bo *bo; /* Binded buffers */ gpgpu->binded_n = 0; gpgpu->img_bitmap = 0; gpgpu->img_index_base = 3; gpgpu->sampler_bitmap = ~((1 << max_sampler_n) - 1); /* URB */ gpgpu->urb.num_cs_entries = 64; gpgpu->urb.size_cs_entry = size_cs_entry; gpgpu->max_threads = max_threads; /* Set the profile buffer*/ if(gpgpu->time_stamp_b.bo) dri_bo_unreference(gpgpu->time_stamp_b.bo); gpgpu->time_stamp_b.bo = NULL; if (profiling) { bo = dri_bo_alloc(gpgpu->drv->bufmgr, "timestamp query", 4096, 4096); assert(bo); gpgpu->time_stamp_b.bo = bo; } /* Constant URB buffer */ if(gpgpu->curbe_b.bo) dri_bo_unreference(gpgpu->curbe_b.bo); uint32_t size_cb = gpgpu->urb.num_cs_entries * gpgpu->urb.size_cs_entry * 64; size_cb = ALIGN(size_cb, 4096); bo = dri_bo_alloc(gpgpu->drv->bufmgr, "CURBE_BUFFER", size_cb, 64); assert(bo); gpgpu->curbe_b.bo = bo; /* surface state */ if(gpgpu->surface_heap_b.bo) dri_bo_unreference(gpgpu->surface_heap_b.bo); bo = dri_bo_alloc(bufmgr, "SURFACE_HEAP", sizeof(surface_heap_t), 32); assert(bo); dri_bo_map(bo, 1); memset(bo->virtual, 0, sizeof(surface_heap_t)); gpgpu->surface_heap_b.bo = bo; /* Interface descriptor remap table */ if(gpgpu->idrt_b.bo) dri_bo_unreference(gpgpu->idrt_b.bo); bo = dri_bo_alloc(bufmgr, "IDRT", MAX_IF_DESC * sizeof(struct gen6_interface_descriptor), 32); assert(bo); gpgpu->idrt_b.bo = bo; /* vfe state */ if(gpgpu->vfe_state_b.bo) dri_bo_unreference(gpgpu->vfe_state_b.bo); gpgpu->vfe_state_b.bo = NULL; /* sampler state */ if (gpgpu->sampler_state_b.bo) dri_bo_unreference(gpgpu->sampler_state_b.bo); bo = dri_bo_alloc(gpgpu->drv->bufmgr, "SAMPLER_STATE", GEN_MAX_SAMPLERS * sizeof(gen6_sampler_state_t), 32); assert(bo); dri_bo_map(bo, 1); memset(bo->virtual, 0, sizeof(gen6_sampler_state_t) * GEN_MAX_SAMPLERS); gpgpu->sampler_state_b.bo = bo; /* sampler border color state */ if (gpgpu->sampler_border_color_state_b.bo) dri_bo_unreference(gpgpu->sampler_border_color_state_b.bo); bo = dri_bo_alloc(gpgpu->drv->bufmgr, "SAMPLER_BORDER_COLOR_STATE", sizeof(gen7_sampler_border_color_t), 32); assert(bo); dri_bo_map(bo, 1); memset(bo->virtual, 0, sizeof(gen7_sampler_border_color_t)); gpgpu->sampler_border_color_state_b.bo = bo; /* stack */ if (gpgpu->stack_b.bo) dri_bo_unreference(gpgpu->stack_b.bo); gpgpu->stack_b.bo = NULL; } static void intel_gpgpu_set_buf_reloc_gen7(intel_gpgpu_t *gpgpu, int32_t index, dri_bo* obj_bo, uint32_t obj_bo_offset) { surface_heap_t *heap = gpgpu->surface_heap_b.bo->virtual; heap->binding_table[index] = offsetof(surface_heap_t, surface) + index * sizeof(gen7_surface_state_t); dri_bo_emit_reloc(gpgpu->surface_heap_b.bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, obj_bo_offset, heap->binding_table[index] + offsetof(gen7_surface_state_t, ss1), obj_bo); } static dri_bo* intel_gpgpu_alloc_constant_buffer(intel_gpgpu_t *gpgpu, uint32_t size) { uint32_t s = size - 1; assert(size != 0); surface_heap_t *heap = gpgpu->surface_heap_b.bo->virtual; gen7_surface_state_t *ss2 = (gen7_surface_state_t *) heap->surface[2]; memset(ss2, 0, sizeof(gen7_surface_state_t)); ss2->ss0.surface_type = I965_SURFACE_BUFFER; ss2->ss0.surface_format = I965_SURFACEFORMAT_RAW; ss2->ss2.width = s & 0x7f; /* bits 6:0 of sz */ ss2->ss2.height = (s >> 7) & 0x3fff; /* bits 20:7 of sz */ ss2->ss3.depth = (s >> 21) & 0x3ff; /* bits 30:21 of sz */ ss2->ss5.cache_control = cc_llc_l3; heap->binding_table[2] = offsetof(surface_heap_t, surface) + 2* sizeof(gen7_surface_state_t); if(gpgpu->constant_b.bo) dri_bo_unreference(gpgpu->constant_b.bo); gpgpu->constant_b.bo = drm_intel_bo_alloc(gpgpu->drv->bufmgr, "CONSTANT_BUFFER", s, 64); assert(gpgpu->constant_b.bo); ss2->ss1.base_addr = gpgpu->constant_b.bo->offset; dri_bo_emit_reloc(gpgpu->surface_heap_b.bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0, heap->binding_table[2] + offsetof(gen7_surface_state_t, ss1), gpgpu->constant_b.bo); return gpgpu->constant_b.bo; } /* Map address space with two 2GB surfaces. One surface for untyped message and * one surface for byte scatters / gathers. Actually the HW does not require two * surfaces but Fulsim complains */ static void intel_gpgpu_map_address_space(intel_gpgpu_t *gpgpu) { surface_heap_t *heap = gpgpu->surface_heap_b.bo->virtual; gen7_surface_state_t *ss0 = (gen7_surface_state_t *) heap->surface[0]; gen7_surface_state_t *ss1 = (gen7_surface_state_t *) heap->surface[1]; memset(ss0, 0, sizeof(gen7_surface_state_t)); memset(ss1, 0, sizeof(gen7_surface_state_t)); ss1->ss0.surface_type = ss0->ss0.surface_type = I965_SURFACE_BUFFER; ss1->ss0.surface_format = ss0->ss0.surface_format = I965_SURFACEFORMAT_RAW; ss1->ss2.width = ss0->ss2.width = 127; /* bits 6:0 of sz */ ss1->ss2.height = ss0->ss2.height = 16383; /* bits 20:7 of sz */ ss0->ss3.depth = 1023; /* bits 30:21 of sz */ ss1->ss3.depth = 1023; /* bits 30:21 of sz */ ss1->ss5.cache_control = ss0->ss5.cache_control = cc_llc_l3; heap->binding_table[0] = offsetof(surface_heap_t, surface); heap->binding_table[1] = sizeof(gen7_surface_state_t) + offsetof(surface_heap_t, surface); } static int intel_get_surface_type(cl_mem_object_type type) { switch (type) { case CL_MEM_OBJECT_IMAGE1D: return I965_SURFACE_1D; case CL_MEM_OBJECT_IMAGE2D: return I965_SURFACE_2D; case CL_MEM_OBJECT_IMAGE3D: return I965_SURFACE_3D; case CL_MEM_OBJECT_IMAGE1D_BUFFER: case CL_MEM_OBJECT_IMAGE2D_ARRAY: case CL_MEM_OBJECT_IMAGE1D_ARRAY: NOT_IMPLEMENTED; break; default: assert(0); } return 0; } static void intel_gpgpu_bind_image_gen7(intel_gpgpu_t *gpgpu, uint32_t index, dri_bo* obj_bo, uint32_t obj_bo_offset, uint32_t format, cl_mem_object_type type, int32_t w, int32_t h, int32_t depth, int32_t pitch, int32_t tiling) { surface_heap_t *heap = gpgpu->surface_heap_b.bo->virtual; gen7_surface_state_t *ss = (gen7_surface_state_t *) heap->surface[index]; memset(ss, 0, sizeof(*ss)); ss->ss0.surface_type = intel_get_surface_type(type); ss->ss0.surface_format = format; ss->ss1.base_addr = obj_bo->offset; ss->ss2.width = w - 1; ss->ss2.height = h - 1; ss->ss3.depth = depth - 1; ss->ss4.not_str_buf.rt_view_extent = depth - 1; ss->ss4.not_str_buf.min_array_element = 0; ss->ss3.pitch = pitch - 1; ss->ss5.cache_control = cc_llc_l3; if (tiling == GPGPU_TILE_X) { ss->ss0.tiled_surface = 1; ss->ss0.tile_walk = I965_TILEWALK_XMAJOR; } else if (tiling == GPGPU_TILE_Y) { ss->ss0.tiled_surface = 1; ss->ss0.tile_walk = I965_TILEWALK_YMAJOR; } ss->ss0.render_cache_rw_mode = 1; /* XXX do we need to set it? */ intel_gpgpu_set_buf_reloc_gen7(gpgpu, index, obj_bo, obj_bo_offset); gpgpu->binded_img[index - gpgpu->img_index_base] = obj_bo; } static void intel_gpgpu_bind_buf(intel_gpgpu_t *gpgpu, drm_intel_bo *buf, uint32_t offset, uint32_t cchint) { assert(gpgpu->binded_n < max_buf_n); gpgpu->binded_buf[gpgpu->binded_n] = buf; gpgpu->binded_offset[gpgpu->binded_n] = offset; gpgpu->binded_n++; } static void intel_gpgpu_set_scratch(intel_gpgpu_t * gpgpu, uint32_t per_thread_size) { drm_intel_bufmgr *bufmgr = gpgpu->drv->bufmgr; drm_intel_bo* old = gpgpu->scratch_b.bo; uint32_t total = per_thread_size * gpgpu->max_threads; gpgpu->per_thread_scratch = per_thread_size; if(old && old->size < total) { drm_intel_bo_unreference(old); old = NULL; } if(!old) gpgpu->scratch_b.bo = drm_intel_bo_alloc(bufmgr, "SCRATCH_BO", total, 4096); } static void intel_gpgpu_set_stack(intel_gpgpu_t *gpgpu, uint32_t offset, uint32_t size, uint32_t cchint) { drm_intel_bufmgr *bufmgr = gpgpu->drv->bufmgr; gpgpu->stack_b.bo = drm_intel_bo_alloc(bufmgr, "STACK", size, 64); intel_gpgpu_bind_buf(gpgpu, gpgpu->stack_b.bo, offset, cchint); } static void intel_gpgpu_bind_image(intel_gpgpu_t *gpgpu, uint32_t index, cl_buffer *obj_bo, uint32_t obj_bo_offset, uint32_t format, cl_mem_object_type type, int32_t w, int32_t h, int32_t depth, int32_t pitch, cl_gpgpu_tiling tiling) { intel_gpgpu_bind_image_gen7(gpgpu, index, (drm_intel_bo*) obj_bo, obj_bo_offset, format, type, w, h, depth, pitch, tiling); assert(index < GEN_MAX_SURFACES); } static void intel_gpgpu_build_idrt(intel_gpgpu_t *gpgpu, cl_gpgpu_kernel *kernel) { gen6_interface_descriptor_t *desc; drm_intel_bo *bo = NULL, *ker_bo = NULL; bo = gpgpu->idrt_b.bo; dri_bo_map(bo, 1); assert(bo->virtual); desc = (gen6_interface_descriptor_t*) bo->virtual; memset(desc, 0, sizeof(*desc)); ker_bo = (drm_intel_bo *) kernel->bo; desc->desc0.kernel_start_pointer = ker_bo->offset >> 6; /* reloc */ desc->desc1.single_program_flow = 1; desc->desc1.floating_point_mode = 0; /* use IEEE-754 rule */ desc->desc5.rounding_mode = 0; /* round to nearest even */ desc->desc2.sampler_state_pointer = gpgpu->sampler_state_b.bo->offset >> 5; desc->desc3.binding_table_entry_count = 0; /* no prefetch */ desc->desc3.binding_table_pointer = 0; desc->desc4.curbe_read_len = kernel->curbe_sz / 32; desc->desc4.curbe_read_offset = 0; /* Barriers / SLM are automatically handled on Gen7+ */ if (gpgpu->drv->gen_ver == 7 || gpgpu->drv->gen_ver == 75) { size_t slm_sz = kernel->slm_sz; desc->desc5.group_threads_num = kernel->use_slm ? kernel->thread_n : 0; desc->desc5.barrier_enable = kernel->use_slm; if (slm_sz <= 4*KB) slm_sz = 4*KB; else if (slm_sz <= 8*KB) slm_sz = 8*KB; else if (slm_sz <= 16*KB) slm_sz = 16*KB; else if (slm_sz <= 32*KB) slm_sz = 32*KB; else slm_sz = 64*KB; slm_sz = slm_sz >> 12; desc->desc5.slm_sz = slm_sz; } else desc->desc5.group_threads_num = kernel->barrierID; /* BarrierID on GEN6 */ dri_bo_emit_reloc(bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0, offsetof(gen6_interface_descriptor_t, desc0), ker_bo); dri_bo_emit_reloc(bo, I915_GEM_DOMAIN_SAMPLER, 0, 0, offsetof(gen6_interface_descriptor_t, desc2), gpgpu->sampler_state_b.bo); dri_bo_unmap(bo); } static void intel_gpgpu_upload_curbes(intel_gpgpu_t *gpgpu, const void* data, uint32_t size) { unsigned char *curbe = NULL; cl_gpgpu_kernel *k = gpgpu->ker; uint32_t i, j; /* Upload the data first */ dri_bo_map(gpgpu->curbe_b.bo, 1); assert(gpgpu->curbe_b.bo->virtual); curbe = (unsigned char *) gpgpu->curbe_b.bo->virtual; memcpy(curbe, data, size); /* Now put all the relocations for our flat address space */ for (i = 0; i < k->thread_n; ++i) for (j = 0; j < gpgpu->binded_n; ++j) { *(uint32_t*)(curbe + gpgpu->binded_offset[j]+i*k->curbe_sz) = gpgpu->binded_buf[j]->offset; drm_intel_bo_emit_reloc(gpgpu->curbe_b.bo, gpgpu->binded_offset[j]+i*k->curbe_sz, gpgpu->binded_buf[j], 0, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER); } dri_bo_unmap(gpgpu->curbe_b.bo); } static void intel_gpgpu_upload_samplers(intel_gpgpu_t *gpgpu, const void *data, uint32_t n) { if (n) { const size_t sz = n * sizeof(gen6_sampler_state_t); memcpy(gpgpu->sampler_state_b.bo->virtual, data, sz); } } int translate_wrap_mode(uint32_t cl_address_mode, int using_nearest) { switch( cl_address_mode ) { case CLK_ADDRESS_NONE: case CLK_ADDRESS_REPEAT: return GEN_TEXCOORDMODE_WRAP; case CLK_ADDRESS_CLAMP: return GEN_TEXCOORDMODE_CLAMP_BORDER; case CLK_ADDRESS_CLAMP_TO_EDGE: return GEN_TEXCOORDMODE_CLAMP; case CLK_ADDRESS_MIRRORED_REPEAT: return GEN_TEXCOORDMODE_MIRROR; default: return GEN_TEXCOORDMODE_WRAP; } } static void intel_gpgpu_insert_sampler(intel_gpgpu_t *gpgpu, uint32_t index, uint32_t clk_sampler) { int using_nearest = 0; uint32_t wrap_mode; gen7_sampler_state_t *sampler; sampler = (gen7_sampler_state_t *)(gpgpu->sampler_state_b.bo->virtual) + index; memset(sampler, 0, sizeof(*sampler)); sampler->ss2.default_color_pointer = (gpgpu->sampler_border_color_state_b.bo->offset) >> 5; if ((clk_sampler & __CLK_NORMALIZED_MASK) == CLK_NORMALIZED_COORDS_FALSE) sampler->ss3.non_normalized_coord = 1; else sampler->ss3.non_normalized_coord = 0; switch (clk_sampler & __CLK_FILTER_MASK) { case CLK_FILTER_NEAREST: sampler->ss0.min_filter = GEN_MAPFILTER_NEAREST; sampler->ss0.mip_filter = GEN_MIPFILTER_NONE; sampler->ss0.mag_filter = GEN_MAPFILTER_NEAREST; using_nearest = 1; break; case CLK_FILTER_LINEAR: sampler->ss0.min_filter = GEN_MAPFILTER_LINEAR; sampler->ss0.mip_filter = GEN_MIPFILTER_NONE; sampler->ss0.mag_filter = GEN_MAPFILTER_LINEAR; break; } wrap_mode = translate_wrap_mode(clk_sampler & __CLK_ADDRESS_MASK, using_nearest); sampler->ss3.s_wrap_mode = wrap_mode; /* XXX mesa i965 driver code point out that if the surface is a 1D surface, we may need * to set t_wrap_mode to GEN_TEXCOORDMODE_WRAP. */ sampler->ss3.t_wrap_mode = wrap_mode; sampler->ss3.r_wrap_mode = wrap_mode; sampler->ss0.lod_preclamp = 1; /* OpenGL mode */ sampler->ss0.default_color_mode = 0; /* OpenGL/DX10 mode */ sampler->ss0.base_level = 0; sampler->ss1.max_lod = 0; sampler->ss1.min_lod = 0; if (sampler->ss0.min_filter != GEN_MAPFILTER_NEAREST) sampler->ss3.address_round |= GEN_ADDRESS_ROUNDING_ENABLE_U_MIN | GEN_ADDRESS_ROUNDING_ENABLE_V_MIN | GEN_ADDRESS_ROUNDING_ENABLE_R_MIN; if (sampler->ss0.mag_filter != GEN_MAPFILTER_NEAREST) sampler->ss3.address_round |= GEN_ADDRESS_ROUNDING_ENABLE_U_MAG | GEN_ADDRESS_ROUNDING_ENABLE_V_MAG | GEN_ADDRESS_ROUNDING_ENABLE_R_MAG; dri_bo_emit_reloc(gpgpu->sampler_state_b.bo, I915_GEM_DOMAIN_SAMPLER, 0, 0, index * sizeof(gen7_sampler_state_t) + offsetof(gen7_sampler_state_t, ss2), gpgpu->sampler_border_color_state_b.bo); } static void intel_gpgpu_bind_sampler(intel_gpgpu_t *gpgpu, uint32_t *samplers, size_t sampler_sz) { int index; #ifdef GEN7_SAMPLER_CLAMP_BORDER_WORKAROUND assert(sampler_sz <= GEN_MAX_SAMPLERS/2); #else assert(sampler_sz <= GEN_MAX_SAMPLERS); #endif for(index = 0; index < sampler_sz; index++) { intel_gpgpu_insert_sampler(gpgpu, index, samplers[index]); #ifdef GEN7_SAMPLER_CLAMP_BORDER_WORKAROUND /* Duplicate the sampler to 8 + index and fixup the address mode * to repeat.*/ if ((samplers[index] & __CLK_ADDRESS_MASK) == CLK_ADDRESS_CLAMP) { intel_gpgpu_insert_sampler(gpgpu, index + 8, (samplers[index] & ~__CLK_ADDRESS_MASK) | CLK_ADDRESS_CLAMP_TO_EDGE); } #endif } } static void intel_gpgpu_states_setup(intel_gpgpu_t *gpgpu, cl_gpgpu_kernel *kernel) { gpgpu->ker = kernel; intel_gpgpu_build_idrt(gpgpu, kernel); intel_gpgpu_map_address_space(gpgpu); dri_bo_unmap(gpgpu->surface_heap_b.bo); dri_bo_unmap(gpgpu->sampler_state_b.bo); dri_bo_unmap(gpgpu->sampler_border_color_state_b.bo); } static void intel_gpgpu_set_perf_counters(intel_gpgpu_t *gpgpu, cl_buffer *perf) { if (gpgpu->perf_b.bo) drm_intel_bo_unreference(gpgpu->perf_b.bo); drm_intel_bo_reference((drm_intel_bo*) perf); gpgpu->perf_b.bo = (drm_intel_bo*) perf; } static void intel_gpgpu_walker(intel_gpgpu_t *gpgpu, uint32_t simd_sz, uint32_t thread_n, const size_t global_wk_off[3], const size_t global_wk_sz[3], const size_t local_wk_sz[3]) { const uint32_t global_wk_dim[3] = { global_wk_sz[0] / local_wk_sz[0], global_wk_sz[1] / local_wk_sz[1], global_wk_sz[2] / local_wk_sz[2] }; uint32_t right_mask = ~0x0; size_t group_sz = local_wk_sz[0] * local_wk_sz[1] * local_wk_sz[2]; assert(simd_sz == 8 || simd_sz == 16); uint32_t shift = (group_sz & (simd_sz - 1)); shift = (shift == 0) ? simd_sz : shift; right_mask = (1 << shift) - 1; BEGIN_BATCH(gpgpu->batch, 11); OUT_BATCH(gpgpu->batch, CMD_GPGPU_WALKER | 9); OUT_BATCH(gpgpu->batch, 0); /* kernel index == 0 */ if (simd_sz == 16) OUT_BATCH(gpgpu->batch, (1 << 30) | (thread_n-1)); /* SIMD16 | thread max */ else OUT_BATCH(gpgpu->batch, (0 << 30) | (thread_n-1)); /* SIMD8 | thread max */ OUT_BATCH(gpgpu->batch, 0); OUT_BATCH(gpgpu->batch, global_wk_dim[0]); OUT_BATCH(gpgpu->batch, 0); OUT_BATCH(gpgpu->batch, global_wk_dim[1]); OUT_BATCH(gpgpu->batch, 0); OUT_BATCH(gpgpu->batch, global_wk_dim[2]); OUT_BATCH(gpgpu->batch, right_mask); OUT_BATCH(gpgpu->batch, ~0x0); /* we always set height as 1, so set bottom mask as all 1*/ ADVANCE_BATCH(gpgpu->batch); BEGIN_BATCH(gpgpu->batch, 2); OUT_BATCH(gpgpu->batch, CMD_MEDIA_STATE_FLUSH | 0); OUT_BATCH(gpgpu->batch, 0); /* kernel index == 0 */ ADVANCE_BATCH(gpgpu->batch); } static intel_event_t* intel_gpgpu_event_new(intel_gpgpu_t *gpgpu) { intel_event_t *event = NULL; TRY_ALLOC_NO_ERR (event, CALLOC(intel_event_t)); event->status = command_queued; event->batch = NULL; event->buffer = gpgpu->batch->buffer; if(event->buffer != NULL) drm_intel_bo_reference(event->buffer); if(gpgpu->time_stamp_b.bo) { event->ts_buf = gpgpu->time_stamp_b.bo; drm_intel_bo_reference(event->ts_buf); } exit: return event; error: cl_free(event); event = NULL; goto exit; } static int intel_gpgpu_event_update_status(intel_event_t *event, int wait) { if(event->status == command_complete) return event->status; if (event->buffer && event->batch == NULL && //have flushed !drm_intel_bo_busy(event->buffer)) { event->status = command_complete; drm_intel_bo_unreference(event->buffer); event->buffer = NULL; return event->status; } if(wait == 0) return event->status; if (event->buffer) { drm_intel_bo_wait_rendering(event->buffer); event->status = command_complete; drm_intel_bo_unreference(event->buffer); event->buffer = NULL; } return event->status; } static void intel_gpgpu_event_pending(intel_gpgpu_t *gpgpu, intel_event_t *event) { assert(event->buffer); //This is gpu enqueue command assert(event->batch == NULL); //This command haven't pengding. event->batch = intel_batchbuffer_new(gpgpu->drv); assert(event->batch); *event->batch = *gpgpu->batch; if(event->batch->buffer) drm_intel_bo_reference(event->batch->buffer); } static void intel_gpgpu_event_resume(intel_event_t *event) { assert(event->batch); //This command have pending. intel_batchbuffer_flush(event->batch); intel_batchbuffer_delete(event->batch); event->batch = NULL; } static void intel_gpgpu_event_delete(intel_event_t *event) { assert(event->batch == NULL); //This command must have been flushed. if(event->buffer) drm_intel_bo_unreference(event->buffer); if(event->ts_buf) drm_intel_bo_unreference(event->ts_buf); cl_free(event); } static void intel_gpgpu_event_get_timestamp(intel_event_t *event, int index, uint64_t* ret_ts) { assert(event->ts_buf != NULL); assert(index == 0 || index == 1); drm_intel_gem_bo_map_gtt(event->ts_buf); uint64_t* ptr = event->ts_buf->virtual; *ret_ts = ptr[index] * 80; //convert to nanoseconds drm_intel_gem_bo_unmap_gtt(event->ts_buf); } LOCAL void intel_set_gpgpu_callbacks(void) { cl_gpgpu_new = (cl_gpgpu_new_cb *) intel_gpgpu_new; cl_gpgpu_delete = (cl_gpgpu_delete_cb *) intel_gpgpu_delete; cl_gpgpu_sync = (cl_gpgpu_sync_cb *) intel_gpgpu_sync; cl_gpgpu_bind_image = (cl_gpgpu_bind_image_cb *) intel_gpgpu_bind_image; cl_gpgpu_bind_buf = (cl_gpgpu_bind_buf_cb *) intel_gpgpu_bind_buf; cl_gpgpu_set_stack = (cl_gpgpu_set_stack_cb *) intel_gpgpu_set_stack; cl_gpgpu_state_init = (cl_gpgpu_state_init_cb *) intel_gpgpu_state_init; cl_gpgpu_set_perf_counters = (cl_gpgpu_set_perf_counters_cb *) intel_gpgpu_set_perf_counters; cl_gpgpu_upload_curbes = (cl_gpgpu_upload_curbes_cb *) intel_gpgpu_upload_curbes; cl_gpgpu_alloc_constant_buffer = (cl_gpgpu_alloc_constant_buffer_cb *) intel_gpgpu_alloc_constant_buffer; cl_gpgpu_states_setup = (cl_gpgpu_states_setup_cb *) intel_gpgpu_states_setup; cl_gpgpu_upload_samplers = (cl_gpgpu_upload_samplers_cb *) intel_gpgpu_upload_samplers; cl_gpgpu_batch_reset = (cl_gpgpu_batch_reset_cb *) intel_gpgpu_batch_reset; cl_gpgpu_batch_start = (cl_gpgpu_batch_start_cb *) intel_gpgpu_batch_start; cl_gpgpu_batch_end = (cl_gpgpu_batch_end_cb *) intel_gpgpu_batch_end; cl_gpgpu_flush = (cl_gpgpu_flush_cb *) intel_gpgpu_flush; cl_gpgpu_walker = (cl_gpgpu_walker_cb *) intel_gpgpu_walker; cl_gpgpu_bind_sampler = (cl_gpgpu_bind_sampler_cb *) intel_gpgpu_bind_sampler; cl_gpgpu_set_scratch = (cl_gpgpu_set_scratch_cb *) intel_gpgpu_set_scratch; cl_gpgpu_event_new = (cl_gpgpu_event_new_cb *)intel_gpgpu_event_new; cl_gpgpu_event_update_status = (cl_gpgpu_event_update_status_cb *)intel_gpgpu_event_update_status; cl_gpgpu_event_pending = (cl_gpgpu_event_pending_cb *)intel_gpgpu_event_pending; cl_gpgpu_event_resume = (cl_gpgpu_event_resume_cb *)intel_gpgpu_event_resume; cl_gpgpu_event_delete = (cl_gpgpu_event_delete_cb *)intel_gpgpu_event_delete; cl_gpgpu_event_get_timestamp = (cl_gpgpu_event_get_timestamp_cb *)intel_gpgpu_event_get_timestamp; } Release_v0.3/src/intel/intel_gpgpu.h000066400000000000000000000021011223142177000175650ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia * Alexei Soupikov */ #ifndef __INTEL_GPGPU_H__ #define __INTEL_GPGPU_H__ #include "cl_utils.h" #include "cl_driver.h" #include #include /* Set the gpgpu related call backs */ extern void intel_set_gpgpu_callbacks(void); #endif /* __INTEL_GPGPU_H__ */ Release_v0.3/src/intel/intel_structs.h000066400000000000000000000277651223142177000202010ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /* * Copyright 2009 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * */ #ifndef __INTEL_STRUCTS_H__ #define __INTEL_STRUCTS_H__ #include typedef struct gen6_interface_descriptor { struct { uint32_t pad6:6; uint32_t kernel_start_pointer:26; } desc0; struct { uint32_t pad:7; uint32_t software_exception:1; uint32_t pad2:3; uint32_t maskstack_exception:1; uint32_t pad3:1; uint32_t illegal_opcode_exception:1; uint32_t pad4:2; uint32_t floating_point_mode:1; uint32_t thread_priority:1; uint32_t single_program_flow:1; uint32_t pad5:1; uint32_t pad6:6; uint32_t pad7:6; } desc1; struct { uint32_t pad:2; uint32_t sampler_count:3; uint32_t sampler_state_pointer:27; } desc2; struct { uint32_t binding_table_entry_count:5; /* prefetch entries only */ uint32_t binding_table_pointer:27; /* 11 bit only on IVB+ */ } desc3; struct { uint32_t curbe_read_offset:16; /* in GRFs */ uint32_t curbe_read_len:16; /* in GRFs */ } desc4; struct { uint32_t group_threads_num:8; /* 0..64, 0 - no barrier use */ uint32_t barrier_return_byte:8; uint32_t slm_sz:5; /* 0..16 - 0K..64K */ uint32_t barrier_enable:1; uint32_t rounding_mode:2; uint32_t barrier_return_grf_offset:8; } desc5; uint32_t desc6; /* unused */ uint32_t desc7; /* unused */ } gen6_interface_descriptor_t; typedef struct gen6_surface_state { struct { uint32_t cube_pos_z:1; uint32_t cube_neg_z:1; uint32_t cube_pos_y:1; uint32_t cube_neg_y:1; uint32_t cube_pos_x:1; uint32_t cube_neg_x:1; uint32_t pad:2; uint32_t render_cache_read_mode:1; uint32_t cube_map_corner_mode:1; uint32_t mipmap_layout_mode:1; uint32_t vert_line_stride_ofs:1; uint32_t vert_line_stride:1; uint32_t color_blend:1; uint32_t writedisable_blue:1; uint32_t writedisable_green:1; uint32_t writedisable_red:1; uint32_t writedisable_alpha:1; uint32_t surface_format:9; uint32_t data_return_format:1; uint32_t pad0:1; uint32_t surface_type:3; } ss0; struct { uint32_t base_addr; } ss1; struct { uint32_t render_target_rotation:2; uint32_t mip_count:4; uint32_t width:13; uint32_t height:13; } ss2; struct { uint32_t tile_walk:1; uint32_t tiled_surface:1; uint32_t pad:1; uint32_t pitch:18; uint32_t depth:11; } ss3; struct { uint32_t multisample_pos_index:3; uint32_t pad:1; uint32_t multisample_count:3; uint32_t pad1:1; uint32_t rt_view_extent:9; uint32_t min_array_elt:11; uint32_t min_lod:4; } ss4; struct { uint32_t pad:16; uint32_t cache_control:2; /* different values for GT and IVB */ uint32_t gfdt:1; /* allows selective flushing of LLC (e.g. for scanout) */ uint32_t encrypted_data:1; uint32_t y_offset:4; uint32_t vertical_alignment:1; uint32_t x_offset:7; } ss5; uint32_t ss6; /* unused */ uint32_t ss7; /* unused */ } gen6_surface_state_t; typedef struct gen7_surface_state { struct { uint32_t cube_pos_z:1; uint32_t cube_neg_z:1; uint32_t cube_pos_y:1; uint32_t cube_neg_y:1; uint32_t cube_pos_x:1; uint32_t cube_neg_x:1; uint32_t media_boundary_pixel_mode:2; uint32_t render_cache_rw_mode:1; uint32_t pad1:1; uint32_t surface_array_spacing:1; uint32_t vertical_line_stride_offset:1; uint32_t vertical_line_stride:1; uint32_t tile_walk:1; uint32_t tiled_surface:1; uint32_t horizontal_alignment:1; uint32_t vertical_alignment:2; uint32_t surface_format:9; uint32_t pad0:1; uint32_t surface_array:1; uint32_t surface_type:3; } ss0; struct { uint32_t base_addr; } ss1; struct { uint32_t width:14; uint32_t pad1:2; uint32_t height:14; uint32_t pad0:2; } ss2; struct { uint32_t pitch:18; uint32_t pad0:3; uint32_t depth:11; } ss3; union { struct { uint32_t mulsample_pal_idx:3; uint32_t numer_mulsample:3; uint32_t mss_fmt:1; uint32_t rt_view_extent:11; uint32_t min_array_element:11; uint32_t rt_rotate:2; uint32_t pad0:1; } not_str_buf; } ss4; struct { uint32_t mip_count:4; uint32_t surface_min_load:4; uint32_t pad2:6; uint32_t coherence_type:1; uint32_t stateless_force_write_thru:1; uint32_t cache_control:4; uint32_t y_offset:4; uint32_t pad0:1; uint32_t x_offset:7; } ss5; uint32_t ss6; /* unused */ uint32_t ss7; /* unused */ } gen7_surface_state_t; STATIC_ASSERT(sizeof(gen6_surface_state_t) == sizeof(gen7_surface_state_t)); static const size_t surface_state_sz = sizeof(gen6_surface_state_t); typedef struct gen6_vfe_state_inline { struct { uint32_t per_thread_scratch_space:4; uint32_t pad3:3; uint32_t extend_vfe_state_present:1; uint32_t pad2:2; uint32_t scratch_base:22; } vfe0; struct { uint32_t debug_counter_control:2; uint32_t gpgpu_mode:1; /* 0 for SNB!!! */ uint32_t gateway_mmio_access:2; uint32_t fast_preempt:1; uint32_t bypass_gateway_ctl:1; /* 0 - legacy, 1 - no open/close */ uint32_t reset_gateway_timer:1; uint32_t urb_entries:8; uint32_t max_threads:16; } vfe1; struct { uint32_t pad8:8; uint32_t debug_object_id:24; } vfe2; struct { uint32_t curbe_size:16; /* in GRFs */ uint32_t urb_size:16; /* in GRFs */ } vfe3; struct { uint32_t scoreboard_mask:32; /* 1 - enable the corresponding dependency */ } vfe4; struct { uint32_t scoreboard0_dx:4; uint32_t scoreboard0_dy:4; uint32_t scoreboard1_dx:4; uint32_t scoreboard1_dy:4; uint32_t scoreboard2_dx:4; uint32_t scoreboard2_dy:4; uint32_t scoreboard3_dx:4; uint32_t scoreboard3_dy:4; } vfe5; struct { uint32_t scoreboard4_dx:4; uint32_t scoreboard4_dy:4; uint32_t scoreboard5_dx:4; uint32_t scoreboard5_dy:4; uint32_t scoreboard6_dx:4; uint32_t scoreboard6_dy:4; uint32_t scoreboard7_dx:4; uint32_t scoreboard7_dy:4; } vfe6; } gen6_vfe_state_inline_t; typedef struct gen6_pipe_control { struct { uint32_t length : BITFIELD_RANGE(0, 7); uint32_t reserved : BITFIELD_RANGE(8, 15); uint32_t instruction_subopcode : BITFIELD_RANGE(16, 23); uint32_t instruction_opcode : BITFIELD_RANGE(24, 26); uint32_t instruction_pipeline : BITFIELD_RANGE(27, 28); uint32_t instruction_type : BITFIELD_RANGE(29, 31); } dw0; struct { uint32_t depth_cache_flush_enable : BITFIELD_BIT(0); uint32_t stall_at_pixel_scoreboard : BITFIELD_BIT(1); uint32_t state_cache_invalidation_enable : BITFIELD_BIT(2); uint32_t constant_cache_invalidation_enable : BITFIELD_BIT(3); uint32_t vf_cache_invalidation_enable : BITFIELD_BIT(4); uint32_t dc_flush_enable : BITFIELD_BIT(5); uint32_t protected_memory_app_id : BITFIELD_BIT(6); uint32_t pipe_control_flush_enable : BITFIELD_BIT(7); uint32_t notify_enable : BITFIELD_BIT(8); uint32_t indirect_state_pointers_disable : BITFIELD_BIT(9); uint32_t texture_cache_invalidation_enable : BITFIELD_BIT(10); uint32_t instruction_cache_invalidate_enable : BITFIELD_BIT(11); uint32_t render_target_cache_flush_enable : BITFIELD_BIT(12); uint32_t depth_stall_enable : BITFIELD_BIT(13); uint32_t post_sync_operation : BITFIELD_RANGE(14, 15); uint32_t generic_media_state_clear : BITFIELD_BIT(16); uint32_t synchronize_gfdt_surface : BITFIELD_BIT(17); uint32_t tlb_invalidate : BITFIELD_BIT(18); uint32_t global_snapshot_count_reset : BITFIELD_BIT(19); uint32_t cs_stall : BITFIELD_BIT(20); uint32_t store_data_index : BITFIELD_BIT(21); uint32_t protected_memory_enable : BITFIELD_BIT(22); uint32_t reserved : BITFIELD_RANGE(23, 31); } dw1; struct { uint32_t reserved : BITFIELD_RANGE(0, 1); uint32_t destination_address_type : BITFIELD_BIT(2); uint32_t address : BITFIELD_RANGE(3, 31); } dw2; struct { uint64_t data; } qw0; } gen6_pipe_control_t; typedef struct gen6_sampler_state { struct { uint32_t shadow_function:3; uint32_t lod_bias:11; uint32_t min_filter:3; uint32_t mag_filter:3; uint32_t mip_filter:2; uint32_t base_level:5; uint32_t min_mag_neq:1; uint32_t lod_preclamp:1; uint32_t default_color_mode:1; uint32_t pad0:1; uint32_t disable:1; } ss0; struct { uint32_t r_wrap_mode:3; uint32_t t_wrap_mode:3; uint32_t s_wrap_mode:3; uint32_t cube_control_mode:1; uint32_t pad:2; uint32_t max_lod:10; uint32_t min_lod:10; } ss1; struct { uint32_t pad:5; uint32_t default_color_pointer:27; } ss2; struct { uint32_t non_normalized_coord:1; uint32_t pad:12; uint32_t address_round:6; uint32_t max_aniso:3; uint32_t chroma_key_mode:1; uint32_t chroma_key_index:2; uint32_t chroma_key_enable:1; uint32_t monochrome_filter_width:3; uint32_t monochrome_filter_height:3; } ss3; } gen6_sampler_state_t; typedef struct gen7_sampler_border_color { float r,g,b,a; } gen7_sampler_border_color_t; typedef struct gen7_sampler_state { struct { uint32_t aniso_algorithm:1; uint32_t lod_bias:13; uint32_t min_filter:3; uint32_t mag_filter:3; uint32_t mip_filter:2; uint32_t base_level:5; uint32_t pad1:1; uint32_t lod_preclamp:1; uint32_t default_color_mode:1; uint32_t pad0:1; uint32_t disable:1; } ss0; struct { uint32_t cube_control_mode:1; uint32_t shadow_function:3; uint32_t pad:4; uint32_t max_lod:12; uint32_t min_lod:12; } ss1; struct { uint32_t pad:5; uint32_t default_color_pointer:27; } ss2; struct { uint32_t r_wrap_mode:3; uint32_t t_wrap_mode:3; uint32_t s_wrap_mode:3; uint32_t pad:1; uint32_t non_normalized_coord:1; uint32_t trilinear_quality:2; uint32_t address_round:6; uint32_t max_aniso:3; uint32_t chroma_key_mode:1; uint32_t chroma_key_index:2; uint32_t chroma_key_enable:1; uint32_t pad0:6; } ss3; } gen7_sampler_state_t; STATIC_ASSERT(sizeof(gen6_sampler_state_t) == sizeof(gen7_sampler_state_t)); #undef BITFIELD_BIT #undef BITFIELD_RANGE #endif /* __INTEL_STRUCTS_H__ */ Release_v0.3/src/kernels/000077500000000000000000000000001223142177000154355ustar00rootroot00000000000000Release_v0.3/src/kernels/cl_internal_copy_buf_align1.cl000066400000000000000000000004371223142177000233740ustar00rootroot00000000000000kernel void __cl_cpy_region_align1 ( global char* src, unsigned int src_offset, global char* dst, unsigned int dst_offset, unsigned int size) { int i = get_global_id(0); if (i < size) dst[i+dst_offset] = src[i+src_offset]; } Release_v0.3/src/kernels/cl_internal_copy_buf_align16.cl000066400000000000000000000007271223142177000234640ustar00rootroot00000000000000kernel void __cl_cpy_region_align16 ( global float* src, unsigned int src_offset, global float* dst, unsigned int dst_offset, unsigned int size) { int i = get_global_id(0) * 4; if (i < size*4) { dst[i+dst_offset] = src[i+src_offset]; dst[i+dst_offset + 1] = src[i+src_offset + 1]; dst[i+dst_offset + 2] = src[i+src_offset + 2]; dst[i+dst_offset + 3] = src[i+src_offset + 3]; } } Release_v0.3/src/kernels/cl_internal_copy_buf_align4.cl000066400000000000000000000004411223142177000233720ustar00rootroot00000000000000kernel void __cl_cpy_region_align4 ( global float* src, unsigned int src_offset, global float* dst, unsigned int dst_offset, unsigned int size) { int i = get_global_id(0); if (i < size) dst[i+dst_offset] = src[i+src_offset]; } Release_v0.3/src/x11/000077500000000000000000000000001223142177000144035ustar00rootroot00000000000000Release_v0.3/src/x11/dricommon.c000066400000000000000000000210701223142177000165360ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia * Note: the code is taken from libva code base */ /* * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include "x11/va_dri2.h" #include "x11/va_dri2tokens.h" #include "x11/dricommon.h" #include "cl_utils.h" #include "cl_alloc.h" #include #include #include #include #define LOCAL __attribute__ ((visibility ("internal"))) LOCAL dri_drawable_t* dri_state_do_drawable_hash(dri_state_t *state, XID drawable) { int index = drawable % DRAWABLE_HASH_SZ; struct dri_drawable *dri_drawable = state->drawable_hash[index]; while (dri_drawable) { if (dri_drawable->x_drawable == drawable) return dri_drawable; dri_drawable = dri_drawable->next; } dri_drawable = dri_state_create_drawable(state, drawable); dri_drawable->x_drawable = drawable; dri_drawable->next = state->drawable_hash[index]; state->drawable_hash[index] = dri_drawable; return dri_drawable; } LOCAL void dri_state_free_drawable_hash(dri_state_t *state) { int i; struct dri_drawable *dri_drawable, *prev; for (i = 0; i < DRAWABLE_HASH_SZ; i++) { dri_drawable = state->drawable_hash[i]; while (dri_drawable) { prev = dri_drawable; dri_drawable = prev->next; dri_state_destroy_drawable(state, prev); } } } LOCAL dri_drawable_t* dri_state_get_drawable(dri_state_t *state, XID drawable) { return dri_state_do_drawable_hash(state, drawable); } LOCAL void dri_state_init_drawable_hash_table(dri_state_t *state) { int i; for(i=0; i < DRAWABLE_HASH_SZ; i++) state->drawable_hash[i] = NULL; } LOCAL void dri_state_delete(dri_state_t *state) { if (state == NULL) return; dri_state_close(state); cl_free(state); } LOCAL dri_state_t* dri_state_new(void) { dri_state_t *state = NULL; TRY_ALLOC_NO_ERR (state, CALLOC(dri_state_t)); state->fd = -1; state->driConnectedFlag = NONE; dri_state_init_drawable_hash_table(state); exit: return state; error: dri_state_delete(state); state = NULL; goto exit; } #define __DRI_BUFFER_FRONT_LEFT 0 #define __DRI_BUFFER_BACK_LEFT 1 #define __DRI_BUFFER_FRONT_RIGHT 2 #define __DRI_BUFFER_BACK_RIGHT 3 #define __DRI_BUFFER_DEPTH 4 #define __DRI_BUFFER_STENCIL 5 #define __DRI_BUFFER_ACCUM 6 #define __DRI_BUFFER_FAKE_FRONT_LEFT 7 #define __DRI_BUFFER_FAKE_FRONT_RIGHT 8 typedef struct dri2_drawable { struct dri_drawable base; union dri_buffer buffers[5]; int width; int height; int has_backbuffer; int back_index; int front_index; } dri2_drawable_t; LOCAL dri_drawable_t* dri_state_create_drawable(dri_state_t *state, XID x_drawable) { dri2_drawable_t *dri2_drwble; dri2_drwble = (dri2_drawable_t*)calloc(1, sizeof(*dri2_drwble)); if (!dri2_drwble) return NULL; dri2_drwble->base.x_drawable = x_drawable; dri2_drwble->base.x = 0; dri2_drwble->base.y = 0; VA_DRI2CreateDrawable(state->x11_dpy, x_drawable); return &dri2_drwble->base; } LOCAL void dri_state_destroy_drawable(dri_state_t *state, dri_drawable_t *dri_drwble) { VA_DRI2DestroyDrawable(state->x11_dpy, dri_drwble->x_drawable); free(dri_drwble); } LOCAL void dri_state_swap_buffer(dri_state_t *state, dri_drawable_t *dri_drwble) { dri2_drawable_t *dri2_drwble = (dri2_drawable_t*)dri_drwble; XRectangle xrect; XserverRegion region; if (dri2_drwble->has_backbuffer) { xrect.x = 0; xrect.y = 0; xrect.width = dri2_drwble->width; xrect.height = dri2_drwble->height; region = XFixesCreateRegion(state->x11_dpy, &xrect, 1); VA_DRI2CopyRegion(state->x11_dpy, dri_drwble->x_drawable, region, DRI2BufferFrontLeft, DRI2BufferBackLeft); XFixesDestroyRegion(state->x11_dpy, region); } } LOCAL union dri_buffer* dri_state_get_rendering_buffer(dri_state_t *state, dri_drawable_t *dri_drwble) { dri2_drawable_t *dri2_drwble = (dri2_drawable_t *)dri_drwble; int i; int count; unsigned int attachments[5]; VA_DRI2Buffer *buffers; i = 0; attachments[i++] = __DRI_BUFFER_BACK_LEFT; attachments[i++] = __DRI_BUFFER_FRONT_LEFT; buffers = VA_DRI2GetBuffers(state->x11_dpy, dri_drwble->x_drawable, &dri2_drwble->width, &dri2_drwble->height, attachments, i, &count); assert(buffers); if (buffers == NULL) return NULL; dri2_drwble->has_backbuffer = 0; for (i = 0; i < count; i++) { dri2_drwble->buffers[i].dri2.attachment = buffers[i].attachment; dri2_drwble->buffers[i].dri2.name = buffers[i].name; dri2_drwble->buffers[i].dri2.pitch = buffers[i].pitch; dri2_drwble->buffers[i].dri2.cpp = buffers[i].cpp; dri2_drwble->buffers[i].dri2.flags = buffers[i].flags; if (buffers[i].attachment == __DRI_BUFFER_BACK_LEFT) { dri2_drwble->has_backbuffer = 1; dri2_drwble->back_index = i; } if (buffers[i].attachment == __DRI_BUFFER_FRONT_LEFT) dri2_drwble->front_index = i; } dri_drwble->width = dri2_drwble->width; dri_drwble->height = dri2_drwble->height; Xfree(buffers); if (dri2_drwble->has_backbuffer) return &dri2_drwble->buffers[dri2_drwble->back_index]; return &dri2_drwble->buffers[dri2_drwble->front_index]; } LOCAL void dri_state_close(dri_state_t *state) { dri_state_free_drawable_hash(state); assert(state->fd >= 0); close(state->fd); } LOCAL void dri_state_release(dri_state_t *state) { dri_state_delete(state); } LOCAL dri_state_t* getDRI2State(Display* dpy, int screen, char **driver_name) { int major, minor; int error_base; int event_base; char *device_name = NULL; drm_magic_t magic; char * internal_driver_name = NULL; int fd = -1; dri_state_t* state = NULL; if (!VA_DRI2QueryExtension(dpy, &event_base, &error_base)) goto err_out; if (!VA_DRI2QueryVersion(dpy, &major, &minor)) goto err_out; if (!VA_DRI2Connect(dpy, RootWindow(dpy, screen), &internal_driver_name, &device_name)) goto err_out; fd = open(device_name, O_RDWR); assert(fd >= 0); if (fd < 0) goto err_out; if (drmGetMagic(fd, &magic)) goto err_out; if (!VA_DRI2Authenticate(dpy, RootWindow(dpy, screen), magic)) goto err_out; if(driver_name) *driver_name = internal_driver_name; else Xfree(internal_driver_name); state = dri_state_new(); state->fd = fd; state->x11_dpy = dpy; state->x11_screen = screen; state->driConnectedFlag = DRI2; if (device_name) Xfree(device_name); return state; err_out: if (device_name) Xfree(device_name); if (internal_driver_name) Xfree(internal_driver_name); if(driver_name) *driver_name = NULL; if (fd >= 0) close(fd); if (driver_name) *driver_name = NULL; return state; } Release_v0.3/src/x11/dricommon.h000066400000000000000000000060111223142177000165410ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia * Note: the code is taken from libva code base */ /* * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef _VA_DRICOMMON_H_ #define _VA_DRICOMMON_H_ #include #include #include #include union dri_buffer { struct { unsigned int attachment; unsigned int name; unsigned int pitch; unsigned int cpp; unsigned int flags; } dri2; }; typedef struct dri_drawable { XID x_drawable; int x; int y; unsigned int width; unsigned int height; struct dri_drawable *next; } dri_drawable_t; #define DRAWABLE_HASH_SZ 32 enum DRI_VER { NONE = 0, // NOT supported VA_DRI1 = 1, DRI2 = 2 }; typedef struct dri_state { Display *x11_dpy; int x11_screen; int fd; enum DRI_VER driConnectedFlag; /* 0: disconnected, 2: DRI2 */ dri_drawable_t *drawable_hash[DRAWABLE_HASH_SZ]; } dri_state_t; dri_drawable_t *dri_state_create_drawable(dri_state_t*, XID x_drawable); void dri_state_destroy_drawable(dri_state_t*, dri_drawable_t*); void dri_state_close(dri_state_t*); void dri_state_release(dri_state_t*); // Create a dri2 state from dpy and screen dri_state_t *getDRI2State(Display* dpy, int screen, char **driver_name); #endif /* _VA_DRICOMMON_H_ */ Release_v0.3/src/x11/mesa_egl_extension.c000066400000000000000000000211071223142177000204200ustar00rootroot00000000000000#include #include "mesa_egl_extension.h" #include "mesa_egl_res_share.h" #include "src/cl_driver.h" struct _egl_display; struct _egl_resource; struct _egl_thread_info; struct _egl_config; struct _egl_surface; struct _egl_driver; typedef struct _egl_display _EGLDisplay; typedef struct _egl_resource _EGLResource; typedef struct _egl_thread_info _EGLThreadInfo; typedef struct _egl_config _EGLConfig; typedef struct _egl_surface _EGLSurface; typedef struct _egl_driver _EGLDriver; /** * A resource of a display. */ struct _egl_resource { /* which display the resource belongs to */ _EGLDisplay *Display; EGLBoolean IsLinked; EGLint RefCount; /* used to link resources of the same type */ _EGLResource *Next; }; /** * "Base" class for device driver contexts. */ struct _egl_context { /* A context is a display resource */ _EGLResource Resource; /* The bound status of the context */ _EGLThreadInfo *Binding; _EGLSurface *DrawSurface; _EGLSurface *ReadSurface; _EGLConfig *Config; EGLint ClientAPI; /**< EGL_OPENGL_ES_API, EGL_OPENGL_API, EGL_OPENVG_API */ EGLint ClientMajorVersion; EGLint ClientMinorVersion; EGLint Flags; EGLint Profile; EGLint ResetNotificationStrategy; /* The real render buffer when a window surface is bound */ EGLint WindowRenderBuffer; }; typedef struct _egl_context _EGLContext; struct dri2_egl_display { int dri2_major; int dri2_minor; __DRIscreen *dri_screen; int own_dri_screen; const __DRIconfig **driver_configs; void *driver; }; enum _egl_platform_type { _EGL_PLATFORM_WINDOWS, _EGL_PLATFORM_X11, _EGL_PLATFORM_WAYLAND, _EGL_PLATFORM_DRM, _EGL_PLATFORM_FBDEV, _EGL_PLATFORM_NULL, _EGL_PLATFORM_ANDROID, _EGL_NUM_PLATFORMS, _EGL_INVALID_PLATFORM = -1 }; typedef enum _egl_platform_type _EGLPlatformType; typedef pthread_mutex_t _EGLMutex; struct _egl_display { /* used to link displays */ _EGLDisplay *Next; _EGLMutex Mutex; _EGLPlatformType Platform; /**< The type of the platform display */ void *PlatformDisplay; /**< A pointer to the platform display */ _EGLDriver *Driver; /**< Matched driver of the display */ EGLBoolean Initialized; /**< True if the display is initialized */ /* options that affect how the driver initializes the display */ struct { EGLBoolean TestOnly; /**< Driver should not set fields when true */ EGLBoolean UseFallback; /**< Use fallback driver (sw or less features) */ } Options; /* these fields are set by the driver during init */ void *DriverData; /**< Driver private data */ }; static struct dri2_egl_display * dri2_egl_display(_EGLDisplay *dpy) { return (struct dri2_egl_display *)dpy->DriverData; } static _EGLDisplay * _eglLockDisplay(EGLDisplay dpy) { return (_EGLDisplay *)dpy; } static _EGLContext * _eglLookupContext(EGLContext ctx, EGLDisplay disp) { disp = disp; return (_EGLContext *) ctx; } struct dri2_egl_context { _EGLContext base; __DRIcontext *dri_context; }; static struct dri2_egl_context * dri2_egl_context(_EGLContext *ctx) { return (struct dri2_egl_context *)ctx; } static EGLBoolean dri2_acquire_texture(_EGLDisplay *disp, _EGLContext *ctx, const EGLint *attr_list, void *user_data) { struct dri2_egl_context *dri2_ctx = dri2_egl_context(ctx); struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp); GLuint texture = 0; GLenum gl_target = 0; GLint level = 0; GLboolean ret; if (_eglParseTextureAttribList(&texture, &gl_target, &level, attr_list) != EGL_SUCCESS) return EGL_FALSE; ret = cl_gl_acquire_texture(dri2_dpy->driver, dri2_ctx->dri_context, gl_target, level, texture, user_data); return ret; } static EGLBoolean dri2_release_texture(_EGLDisplay *disp, _EGLContext *ctx, const EGLint *attr_list) { struct dri2_egl_context *dri2_ctx = dri2_egl_context(ctx); struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp); GLuint texture = 0; GLenum gl_target = 0; GLint level = 0; GLboolean ret; if (_eglParseTextureAttribList(&texture, &gl_target, &level, attr_list) != EGL_SUCCESS) return EGL_FALSE; ret = cl_gl_release_texture(dri2_dpy->driver, dri2_ctx->dri_context, gl_target, level, texture); return ret; } static EGLBoolean dri2_acquire_buffer_object(_EGLDisplay *disp, _EGLContext *ctx, const EGLint *attr_list, void *user_data) { struct dri2_egl_context *dri2_ctx = dri2_egl_context(ctx); struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp); GLuint bufobj = 0; GLboolean ret; if (_eglParseBufferObjAttribList(&bufobj, attr_list) != EGL_SUCCESS) return EGL_FALSE; ret = cl_gl_acquire_buffer_object(dri2_dpy->driver, dri2_ctx->dri_context, bufobj, user_data); return ret; } static EGLBoolean dri2_release_buffer_object(_EGLDisplay *disp, _EGLContext *ctx, const EGLint *attr_list) { struct dri2_egl_context *dri2_ctx = dri2_egl_context(ctx); struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp); GLuint bufobj = 0; GLboolean ret; if (_eglParseBufferObjAttribList(&bufobj, attr_list) != EGL_SUCCESS) return EGL_FALSE; ret = cl_gl_release_buffer_object(dri2_dpy->driver, dri2_ctx->dri_context, bufobj); return ret; } static EGLBoolean dri2_acquire_render_buffer(_EGLDisplay *disp, _EGLContext *ctx, const EGLint *attr_list, void *user_data) { struct dri2_egl_context *dri2_ctx = dri2_egl_context(ctx); struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp); GLuint rb = 0; GLboolean ret; if (_eglParseBufferObjAttribList(&rb, attr_list) != EGL_SUCCESS) return EGL_FALSE; ret = cl_gl_acquire_render_buffer(dri2_dpy->driver, dri2_ctx->dri_context, rb, user_data); return ret; } static EGLBoolean dri2_release_render_buffer(_EGLDisplay *disp, _EGLContext *ctx, const EGLint *attr_list) { struct dri2_egl_context *dri2_ctx = dri2_egl_context(ctx); struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp); GLuint rb = 0; GLboolean ret; if (_eglParseBufferObjAttribList(&rb, attr_list) != EGL_SUCCESS) return EGL_FALSE; ret = cl_gl_release_render_buffer(dri2_dpy->driver, dri2_ctx->dri_context, rb); return ret; } static EGLBoolean dri2_acquire_resource_mesa(_EGLDisplay *disp, _EGLContext *ctx, const EGLenum target, const EGLint *attrib_list, void *user_data) { switch (target) { case EGL_GL_TEXTURE_MESA: return dri2_acquire_texture(disp, ctx, attrib_list, user_data); case EGL_GL_BUFFER_OBJECT_MESA: return dri2_acquire_buffer_object(disp, ctx, attrib_list, user_data); case EGL_GL_RENDER_BUFFER_MESA: return dri2_acquire_render_buffer(disp, ctx, attrib_list, user_data); default: fprintf(stderr, "bad resource target value 0x%04x", target); } return EGL_FALSE; } static EGLBoolean dri2_release_resource_mesa(_EGLDisplay *disp, _EGLContext *ctx, const EGLenum target, const EGLint *attrib_list) { switch (target) { case EGL_GL_TEXTURE_MESA: return dri2_release_texture(disp, ctx, attrib_list); case EGL_GL_BUFFER_OBJECT_MESA: return dri2_release_buffer_object(disp, ctx, attrib_list); case EGL_GL_RENDER_BUFFER_MESA: return dri2_release_render_buffer(disp, ctx, attrib_list); default: fprintf(stderr, "bad resource target value 0x%04x", target); } return EGL_FALSE; } EGLBoolean eglAcquireResourceMESA(EGLDisplay dpy, EGLContext ctx, EGLenum target, const EGLint *attrib_list, void *user) { _EGLDisplay *disp = _eglLockDisplay(dpy); _EGLContext *context = _eglLookupContext(ctx, disp); return dri2_acquire_resource_mesa(disp, context, target, attrib_list, user); } EGLBoolean eglReleaseResourceMESA(EGLDisplay dpy, EGLContext ctx, EGLenum target, const EGLint *attrib_list) { _EGLDisplay *disp = _eglLockDisplay(dpy); _EGLContext *context = _eglLookupContext(ctx, disp); return dri2_release_resource_mesa(disp, context, target, attrib_list); } Release_v0.3/src/x11/mesa_egl_extension.h000066400000000000000000000020211223142177000204170ustar00rootroot00000000000000#ifndef __MESA_EGL_EXTENSION_H__ #define __MESA_EGL_EXTENSION_H__ #include #include #include #define EGL_GL_TEXTURE_MESA 0x3300 /* eglAcuireResource target */ #define EGL_GL_BUFFER_OBJECT_MESA 0x3301 /* eglAcuireResource target */ #define EGL_GL_RENDER_BUFFER_MESA 0x3302 /* eglAcuireResource target */ #define EGL_GL_TEXTURE_ID_MESA 0x3303 /* eglAcuireResource attribute */ #define EGL_GL_TEXTURE_LEVEL_MESA 0x3304 /* eglAcuireResource attribute */ #define EGL_GL_TEXTURE_TARGET_MESA 0x3305 /* eglAcuireResource attribute */ #define EGL_GL_BUFFER_OBJECT_ID_MESA 0x3306 /* eglAcuireResource attribute */ #define EGL_GL_RENDER_BUFFER_ID_MESA 0x3307 /* eglAcuireResource attribute */ EGLBoolean eglAcquireResourceMESA(EGLDisplay dpy, EGLContext ctx, EGLenum target, const EGLint *attrib_list, void * user_data); EGLBoolean eglReleaseResourceMESA(EGLDisplay dpy, EGLContext ctx, EGLenum target, const EGLint *attrib_list); #endif Release_v0.3/src/x11/mesa_egl_res_share.c000066400000000000000000000071331223142177000203620ustar00rootroot00000000000000/************************************************************************** * * Copyright 2013-2014 Zhigang Gong * Copyright 2013-2014 Intel, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * **************************************************************************/ #include #include #include "mesa_egl_extension.h" #include "mesa_egl_res_share.h" /** * Parse the list of share texture attributes and return the proper error code. */ EGLint _eglParseTextureAttribList(unsigned int *texture, EGLenum *gl_target, EGLint *level, const EGLint *attrib_list) { EGLint i, err = EGL_SUCCESS; *texture = 0; *gl_target = 0; *level = 0; if (!attrib_list) return EGL_BAD_ATTRIBUTE; for (i = 0; attrib_list[i] != EGL_NONE; i++) { EGLint attr = attrib_list[i++]; EGLint val = attrib_list[i]; switch (attr) { case EGL_GL_TEXTURE_LEVEL_MESA: *level = val; break; case EGL_GL_TEXTURE_ID_MESA: *texture = val; break; case EGL_GL_TEXTURE_TARGET_MESA: *gl_target = val; break; default: /* unknown attrs are ignored */ break; } } return err; } /** * Parse the list of share texture attributes and return the proper error code. */ EGLint _eglParseBufferObjAttribList(unsigned int *bufobj, const EGLint *attrib_list) { EGLint i, err = EGL_SUCCESS; *bufobj = 0; if (!attrib_list) return EGL_BAD_ATTRIBUTE; for (i = 0; attrib_list[i] != EGL_NONE; i++) { EGLint attr = attrib_list[i++]; EGLint val = attrib_list[i]; switch (attr) { case EGL_GL_BUFFER_OBJECT_ID_MESA: *bufobj = val; break; default: /* unknown attrs are ignored */ break; } } if (*bufobj == 0) err = EGL_BAD_ATTRIBUTE; return err; } /** * Parse the list of share texture attributes and return the proper error code. */ EGLint _eglParseRenderBufferAttribList(unsigned int *rb, const EGLint *attrib_list) { EGLint i, err = EGL_SUCCESS; *rb = 0; if (!attrib_list) return EGL_BAD_ATTRIBUTE; for (i = 0; attrib_list[i] != EGL_NONE; i++) { EGLint attr = attrib_list[i++]; EGLint val = attrib_list[i]; switch (attr) { case EGL_GL_RENDER_BUFFER_ID_MESA: *rb = val; break; default: /* unknown attrs are ignored */ break; } } if (*rb == 0) err = EGL_BAD_ATTRIBUTE; return err; } Release_v0.3/src/x11/mesa_egl_res_share.h000066400000000000000000000034551223142177000203720ustar00rootroot00000000000000/************************************************************************** * * Copyright 2013-2014 Zhigang Gong * Copyright 2013-2014 Intel, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * **************************************************************************/ #ifndef EGLRESSHARE_INCLUDED #define EGLRESSHARE_INCLUDED #include EGLint _eglParseTextureAttribList(unsigned int *texture, EGLenum *gl_target, EGLint *level, const EGLint *attrib_list); EGLint _eglParseBufferObjAttribList(unsigned int *bufobj, const EGLint *attrib_list); EGLint _eglParseRenderBufferAttribList(unsigned int *rb, const EGLint *attrib_list); #endif Release_v0.3/src/x11/va_dri2.c000066400000000000000000000224501223142177000161000ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /* * Copyright 2008 Red Hat, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Soft- * ware"), to deal in the Software without restriction, including without * limitation the rights to use, copy, modify, merge, publish, distribute, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, provided that the above copyright * notice(s) and this permission notice appear in all copies of the Soft- * ware and that both the above copyright notice(s) and this permission * notice appear in supporting documentation. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- * ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY * RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN * THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSE- * QUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFOR- * MANCE OF THIS SOFTWARE. * * Except as contained in this notice, the name of a copyright holder shall * not be used in advertising or otherwise to promote the sale, use or * other dealings in this Software without prior written authorization of * the copyright holder. * * Authors: * Kristian Hgsberg (krh@redhat.com) */ #define NEED_REPLIES #include #include #include #include "xf86drm.h" #include "x11/va_dri2.h" #include "x11/va_dri2str.h" #include "x11/va_dri2tokens.h" #ifndef DRI2DriverDRI #define DRI2DriverDRI 0 #endif #define LOCAL __attribute__ ((visibility ("internal"))) static char va_dri2ExtensionName[] = DRI2_NAME; static XExtensionInfo _va_dri2_info_data; static XExtensionInfo *va_dri2Info = &_va_dri2_info_data; static XEXT_GENERATE_CLOSE_DISPLAY (VA_DRI2CloseDisplay, va_dri2Info) static /* const */ XExtensionHooks va_dri2ExtensionHooks = { NULL, /* create_gc */ NULL, /* copy_gc */ NULL, /* flush_gc */ NULL, /* free_gc */ NULL, /* create_font */ NULL, /* free_font */ VA_DRI2CloseDisplay, /* close_display */ NULL, /* wire_to_event */ NULL, /* event_to_wire */ NULL, /* error */ NULL, /* error_string */ }; static XEXT_GENERATE_FIND_DISPLAY (DRI2FindDisplay, va_dri2Info, va_dri2ExtensionName, &va_dri2ExtensionHooks, 0, NULL) LOCAL Bool VA_DRI2QueryExtension(Display *dpy, int *eventBase, int *errorBase) { XExtDisplayInfo *info = DRI2FindDisplay(dpy); if (XextHasExtension(info)) { *eventBase = info->codes->first_event; *errorBase = info->codes->first_error; return True; } return False; } LOCAL Bool VA_DRI2QueryVersion(Display *dpy, int *major, int *minor) { XExtDisplayInfo *info = DRI2FindDisplay (dpy); xDRI2QueryVersionReply rep; xDRI2QueryVersionReq *req; XextCheckExtension (dpy, info, va_dri2ExtensionName, False); LockDisplay(dpy); GetReq(DRI2QueryVersion, req); req->reqType = info->codes->major_opcode; req->dri2Reqtype = X_DRI2QueryVersion; req->majorVersion = DRI2_MAJOR; req->minorVersion = DRI2_MINOR; if (!_XReply(dpy, (xReply *)&rep, 0, xFalse)) { UnlockDisplay(dpy); SyncHandle(); return False; } *major = rep.majorVersion; *minor = rep.minorVersion; UnlockDisplay(dpy); SyncHandle(); return True; } LOCAL Bool VA_DRI2Connect(Display *dpy, XID window, char **driverName, char **deviceName) { XExtDisplayInfo *info = DRI2FindDisplay(dpy); xDRI2ConnectReply rep; xDRI2ConnectReq *req; XextCheckExtension (dpy, info, va_dri2ExtensionName, False); LockDisplay(dpy); GetReq(DRI2Connect, req); req->reqType = info->codes->major_opcode; req->dri2Reqtype = X_DRI2Connect; req->window = window; req->drivertype = DRI2DriverDRI; if (!_XReply(dpy, (xReply *)&rep, 0, xFalse)) { UnlockDisplay(dpy); SyncHandle(); return False; } if (rep.driverNameLength == 0 && rep.deviceNameLength == 0) { UnlockDisplay(dpy); SyncHandle(); return False; } *driverName = Xmalloc(rep.driverNameLength + 1); if (*driverName == NULL) { _XEatData(dpy, ((rep.driverNameLength + 3) & ~3) + ((rep.deviceNameLength + 3) & ~3)); UnlockDisplay(dpy); SyncHandle(); return False; } _XReadPad(dpy, *driverName, rep.driverNameLength); (*driverName)[rep.driverNameLength] = '\0'; *deviceName = Xmalloc(rep.deviceNameLength + 1); if (*deviceName == NULL) { Xfree(*driverName); _XEatData(dpy, ((rep.deviceNameLength + 3) & ~3)); UnlockDisplay(dpy); SyncHandle(); return False; } _XReadPad(dpy, *deviceName, rep.deviceNameLength); (*deviceName)[rep.deviceNameLength] = '\0'; UnlockDisplay(dpy); SyncHandle(); return True; } LOCAL Bool VA_DRI2Authenticate(Display *dpy, XID window, drm_magic_t magic) { XExtDisplayInfo *info = DRI2FindDisplay(dpy); xDRI2AuthenticateReq *req; xDRI2AuthenticateReply rep; XextCheckExtension (dpy, info, va_dri2ExtensionName, False); LockDisplay(dpy); GetReq(DRI2Authenticate, req); req->reqType = info->codes->major_opcode; req->dri2Reqtype = X_DRI2Authenticate; req->window = window; req->magic = magic; if (!_XReply(dpy, (xReply *)&rep, 0, xFalse)) { UnlockDisplay(dpy); SyncHandle(); return False; } UnlockDisplay(dpy); SyncHandle(); return rep.authenticated; } LOCAL void VA_DRI2CreateDrawable(Display *dpy, XID drawable) { XExtDisplayInfo *info = DRI2FindDisplay(dpy); xDRI2CreateDrawableReq *req; XextSimpleCheckExtension (dpy, info, va_dri2ExtensionName); LockDisplay(dpy); GetReq(DRI2CreateDrawable, req); req->reqType = info->codes->major_opcode; req->dri2Reqtype = X_DRI2CreateDrawable; req->drawable = drawable; UnlockDisplay(dpy); SyncHandle(); } LOCAL void VA_DRI2DestroyDrawable(Display *dpy, XID drawable) { XExtDisplayInfo *info = DRI2FindDisplay(dpy); xDRI2DestroyDrawableReq *req; XextSimpleCheckExtension (dpy, info, va_dri2ExtensionName); XSync(dpy, False); LockDisplay(dpy); GetReq(DRI2DestroyDrawable, req); req->reqType = info->codes->major_opcode; req->dri2Reqtype = X_DRI2DestroyDrawable; req->drawable = drawable; UnlockDisplay(dpy); SyncHandle(); } LOCAL VA_DRI2Buffer *VA_DRI2GetBuffers(Display *dpy, XID drawable, int *width, int *height, unsigned int *attachments, int count, int *outcount) { XExtDisplayInfo *info = DRI2FindDisplay(dpy); xDRI2GetBuffersReply rep; xDRI2GetBuffersReq *req; VA_DRI2Buffer *buffers; xDRI2Buffer repBuffer; CARD32 *p; int i; XextCheckExtension (dpy, info, va_dri2ExtensionName, False); LockDisplay(dpy); GetReqExtra(DRI2GetBuffers, count * 4, req); req->reqType = info->codes->major_opcode; req->dri2Reqtype = X_DRI2GetBuffers; req->drawable = drawable; req->count = count; p = (CARD32 *) &req[1]; for (i = 0; i < count; i++) p[i] = attachments[i]; if (!_XReply(dpy, (xReply *)&rep, 0, xFalse)) { UnlockDisplay(dpy); SyncHandle(); return NULL; } *width = rep.width; *height = rep.height; *outcount = rep.count; buffers = Xmalloc(rep.count * sizeof buffers[0]); if (buffers == NULL) { _XEatData(dpy, rep.count * sizeof repBuffer); UnlockDisplay(dpy); SyncHandle(); return NULL; } for (i = 0; i < (int) rep.count; i++) { _XReadPad(dpy, (char *) &repBuffer, sizeof repBuffer); buffers[i].attachment = repBuffer.attachment; buffers[i].name = repBuffer.name; buffers[i].pitch = repBuffer.pitch; buffers[i].cpp = repBuffer.cpp; buffers[i].flags = repBuffer.flags; } UnlockDisplay(dpy); SyncHandle(); return buffers; } LOCAL void VA_DRI2CopyRegion(Display *dpy, XID drawable, XserverRegion region, CARD32 dest, CARD32 src) { XExtDisplayInfo *info = DRI2FindDisplay(dpy); xDRI2CopyRegionReq *req; xDRI2CopyRegionReply rep; XextSimpleCheckExtension (dpy, info, va_dri2ExtensionName); LockDisplay(dpy); GetReq(DRI2CopyRegion, req); req->reqType = info->codes->major_opcode; req->dri2Reqtype = X_DRI2CopyRegion; req->drawable = drawable; req->region = region; req->dest = dest; req->src = src; _XReply(dpy, (xReply *)&rep, 0, xFalse); UnlockDisplay(dpy); SyncHandle(); } Release_v0.3/src/x11/va_dri2.h000066400000000000000000000065761223142177000161200ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /* * Copyright 2007,2008 Red Hat, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Soft- * ware"), to deal in the Software without restriction, including without * limitation the rights to use, copy, modify, merge, publish, distribute, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, provided that the above copyright * notice(s) and this permission notice appear in all copies of the Soft- * ware and that both the above copyright notice(s) and this permission * notice appear in supporting documentation. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- * ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY * RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN * THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSE- * QUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFOR- * MANCE OF THIS SOFTWARE. * * Except as contained in this notice, the name of a copyright holder shall * not be used in advertising or otherwise to promote the sale, use or * other dealings in this Software without prior written authorization of * the copyright holder. * * Authors: * Kristian Hgsberg (krh@redhat.com) */ #ifndef _VA_DRI2_H_ #define _VA_DRI2_H_ #include #include #include typedef struct { unsigned int attachment; unsigned int name; unsigned int pitch; unsigned int cpp; unsigned int flags; } VA_DRI2Buffer; extern Bool VA_DRI2QueryExtension(Display *display, int *eventBase, int *errorBase); extern Bool VA_DRI2QueryVersion(Display *display, int *major, int *minor); extern Bool VA_DRI2Connect(Display *display, XID window, char **driverName, char **deviceName); extern Bool VA_DRI2Authenticate(Display *display, XID window, drm_magic_t magic); extern void VA_DRI2CreateDrawable(Display *display, XID drawable); extern void VA_DRI2DestroyDrawable(Display *display, XID handle); extern VA_DRI2Buffer * VA_DRI2GetBuffers(Display *dpy, XID drawable, int *width, int *height, unsigned int *attachments, int count, int *outcount); #if 1 extern void VA_DRI2CopyRegion(Display *dpy, XID drawable, XserverRegion region, CARD32 dest, CARD32 src); #endif #endif Release_v0.3/src/x11/va_dri2str.h000066400000000000000000000135351223142177000166420ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /* * Copyright 2008 Red Hat, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Soft- * ware"), to deal in the Software without restriction, including without * limitation the rights to use, copy, modify, merge, publish, distribute, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, provided that the above copyright * notice(s) and this permission notice appear in all copies of the Soft- * ware and that both the above copyright notice(s) and this permission * notice appear in supporting documentation. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- * ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY * RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN * THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSE- * QUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFOR- * MANCE OF THIS SOFTWARE. * * Except as contained in this notice, the name of a copyright holder shall * not be used in advertising or otherwise to promote the sale, use or * other dealings in this Software without prior written authorization of * the copyright holder. * * Authors: * Kristian Hgsberg (krh@redhat.com) */ #ifndef _DRI2_PROTO_H_ #define _DRI2_PROTO_H_ #define DRI2_NAME "DRI2" #define DRI2_MAJOR 1 #define DRI2_MINOR 0 #define DRI2NumberErrors 0 #define DRI2NumberEvents 0 #define DRI2NumberRequests 7 #define X_DRI2QueryVersion 0 #define X_DRI2Connect 1 #define X_DRI2Authenticate 2 #define X_DRI2CreateDrawable 3 #define X_DRI2DestroyDrawable 4 #define X_DRI2GetBuffers 5 #define X_DRI2CopyRegion 6 typedef struct { CARD32 attachment B32; CARD32 name B32; CARD32 pitch B32; CARD32 cpp B32; CARD32 flags B32; } xDRI2Buffer; typedef struct { CARD8 reqType; CARD8 dri2Reqtype; CARD16 length B16; CARD32 majorVersion B32; CARD32 minorVersion B32; } xDRI2QueryVersionReq; #define sz_xDRI2QueryVersionReq 12 typedef struct { BYTE type; /* X_Reply */ BYTE pad1; CARD16 sequenceNumber B16; CARD32 length B32; CARD32 majorVersion B32; CARD32 minorVersion B32; CARD32 pad2 B32; CARD32 pad3 B32; CARD32 pad4 B32; CARD32 pad5 B32; } xDRI2QueryVersionReply; #define sz_xDRI2QueryVersionReply 32 typedef struct { CARD8 reqType; CARD8 dri2Reqtype; CARD16 length B16; CARD32 window B32; CARD32 drivertype B32; } xDRI2ConnectReq; #define sz_xDRI2ConnectReq 12 typedef struct { BYTE type; /* X_Reply */ BYTE pad1; CARD16 sequenceNumber B16; CARD32 length B32; CARD32 driverNameLength B32; CARD32 deviceNameLength B32; CARD32 pad2 B32; CARD32 pad3 B32; CARD32 pad4 B32; CARD32 pad5 B32; } xDRI2ConnectReply; #define sz_xDRI2ConnectReply 32 typedef struct { CARD8 reqType; CARD8 dri2Reqtype; CARD16 length B16; CARD32 window B32; CARD32 magic B32; } xDRI2AuthenticateReq; #define sz_xDRI2AuthenticateReq 12 typedef struct { BYTE type; /* X_Reply */ BYTE pad1; CARD16 sequenceNumber B16; CARD32 length B32; CARD32 authenticated B32; CARD32 pad2 B32; CARD32 pad3 B32; CARD32 pad4 B32; CARD32 pad5 B32; CARD32 pad6 B32; } xDRI2AuthenticateReply; #define sz_xDRI2AuthenticateReply 32 typedef struct { CARD8 reqType; CARD8 dri2Reqtype; CARD16 length B16; CARD32 drawable B32; } xDRI2CreateDrawableReq; #define sz_xDRI2CreateDrawableReq 8 typedef struct { CARD8 reqType; CARD8 dri2Reqtype; CARD16 length B16; CARD32 drawable B32; } xDRI2DestroyDrawableReq; #define sz_xDRI2DestroyDrawableReq 8 typedef struct { CARD8 reqType; CARD8 dri2Reqtype; CARD16 length B16; CARD32 drawable B32; CARD32 count B32; } xDRI2GetBuffersReq; #define sz_xDRI2GetBuffersReq 12 typedef struct { BYTE type; /* X_Reply */ BYTE pad1; CARD16 sequenceNumber B16; CARD32 length B32; CARD32 width B32; CARD32 height B32; CARD32 count B32; CARD32 pad2 B32; CARD32 pad3 B32; CARD32 pad4 B32; } xDRI2GetBuffersReply; #define sz_xDRI2GetBuffersReply 32 typedef struct { CARD8 reqType; CARD8 dri2Reqtype; CARD16 length B16; CARD32 drawable B32; CARD32 region B32; CARD32 dest B32; CARD32 src B32; } xDRI2CopyRegionReq; #define sz_xDRI2CopyRegionReq 20 typedef struct { BYTE type; /* X_Reply */ BYTE pad1; CARD16 sequenceNumber B16; CARD32 length B32; CARD32 pad2 B32; CARD32 pad3 B32; CARD32 pad4 B32; CARD32 pad5 B32; CARD32 pad6 B32; CARD32 pad7 B32; } xDRI2CopyRegionReply; #define sz_xDRI2CopyRegionReply 32 #endif Release_v0.3/src/x11/va_dri2tokens.h000066400000000000000000000052731223142177000173350ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /* * Copyright 2008 Red Hat, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Soft- * ware"), to deal in the Software without restriction, including without * limitation the rights to use, copy, modify, merge, publish, distribute, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, provided that the above copyright * notice(s) and this permission notice appear in all copies of the Soft- * ware and that both the above copyright notice(s) and this permission * notice appear in supporting documentation. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- * ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY * RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN * THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSE- * QUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFOR- * MANCE OF THIS SOFTWARE. * * Except as contained in this notice, the name of a copyright holder shall * not be used in advertising or otherwise to promote the sale, use or * other dealings in this Software without prior written authorization of * the copyright holder. * * Authors: * Kristian Hgsberg (krh@redhat.com) */ #ifndef _DRI2_TOKENS_H_ #define _DRI2_TOKENS_H_ #define DRI2BufferFrontLeft 0 #define DRI2BufferBackLeft 1 #define DRI2BufferFrontRight 2 #define DRI2BufferBackRight 3 #define DRI2BufferDepth 4 #define DRI2BufferStencil 5 #define DRI2BufferAccum 6 #define DRI2BufferFakeFrontLeft 7 #define DRI2BufferFakeFrontRight 8 #define DRI2DriverDRI 0 #endif Release_v0.3/utests/000077500000000000000000000000001223142177000145325ustar00rootroot00000000000000Release_v0.3/utests/.gitignore000066400000000000000000000004461223142177000165260ustar00rootroot00000000000000compiler_box_blur.bmp compiler_box_blur_float.bmp compiler_clod.bmp compiler_julia.bmp compiler_julia_no_break.bmp compiler_mandelbrot.bmp compiler_mandelbrot_alternate.bmp compiler_menger_sponge_no_shadow.bmp compiler_nautilus.bmp compiler_ribbon.bmp flat_address_space libutests.so utest_run Release_v0.3/utests/CMakeLists.txt000066400000000000000000000120661223142177000172770ustar00rootroot00000000000000INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/../include) link_directories (${LLVM_LIBRARY_DIR}) set (utests_sources cl_create_kernel.cpp utest_error.c compiler_basic_arithmetic.cpp compiler_displacement_map_element.cpp compiler_shader_toy.cpp compiler_mandelbrot.cpp compiler_mandelbrot_alternate.cpp compiler_box_blur_float.cpp compiler_box_blur_image.cpp compiler_box_blur.cpp compiler_insert_to_constant.cpp compiler_argument_structure.cpp compiler_arith_shift_right.cpp compiler_array0.cpp compiler_array.cpp compiler_array1.cpp compiler_array2.cpp compiler_array3.cpp compiler_byte_scatter.cpp compiler_ceil.cpp compiler_clz_short.cpp compiler_clz_int.cpp compiler_convert_uchar_sat.cpp compiler_copy_buffer.cpp compiler_copy_image.cpp compiler_copy_image_3d.cpp compiler_copy_buffer_row.cpp compiler_degrees.cpp compiler_step.cpp compiler_fabs.cpp compiler_abs.cpp compiler_abs_diff.cpp compiler_fill_image.cpp compiler_fill_image0.cpp compiler_fill_image_3d.cpp compiler_fill_image_3d_2.cpp compiler_function_argument0.cpp compiler_function_argument1.cpp compiler_function_argument2.cpp compiler_function_argument.cpp compiler_function_constant0.cpp compiler_function_constant1.cpp compiler_function_constant.cpp compiler_global_constant.cpp compiler_global_constant_2.cpp compiler_group_size.cpp compiler_hadd.cpp compiler_if_else.cpp compiler_integer_division.cpp compiler_integer_remainder.cpp compiler_insert_vector.cpp compiler_lower_return0.cpp compiler_lower_return1.cpp compiler_lower_return2.cpp compiler_mad_hi.cpp compiler_mul_hi.cpp compiler_mad24.cpp compiler_mul24.cpp compiler_multiple_kernels.cpp compiler_radians.cpp compiler_rhadd.cpp compiler_rotate.cpp compiler_saturate.cpp compiler_saturate_sub.cpp compiler_shift_right.cpp compiler_short_scatter.cpp compiler_smoothstep.cpp compiler_uint2_copy.cpp compiler_uint3_copy.cpp compiler_uint8_copy.cpp compiler_uint16_copy.cpp compiler_uint3_unaligned_copy.cpp compiler_upsample_int.cpp compiler_upsample_long.cpp compiler_unstructured_branch0.cpp compiler_unstructured_branch1.cpp compiler_unstructured_branch2.cpp compiler_unstructured_branch3.cpp compiler_write_only_bytes.cpp compiler_write_only.cpp compiler_write_only_shorts.cpp compiler_switch.cpp compiler_math.cpp compiler_atomic_functions.cpp compiler_async_copy.cpp compiler_async_stride_copy.cpp compiler_insn_selection_min.cpp compiler_insn_selection_max.cpp compiler_insn_selection_masked_min_max.cpp compiler_load_bool_imm.cpp compiler_global_memory_barrier.cpp compiler_local_memory_two_ptr.cpp compiler_local_memory_barrier.cpp compiler_local_memory_barrier_wg64.cpp compiler_local_memory_barrier_2.cpp compiler_local_slm.cpp compiler_movforphi_undef.cpp compiler_volatile.cpp compiler_copy_image1.cpp compiler_get_image_info.cpp compiler_vect_compare.cpp compiler_vector_load_store.cpp compiler_vector_inc.cpp compiler_cl_finish.cpp get_cl_info.cpp builtin_atan2.cpp builtin_bitselect.cpp builtin_frexp.cpp builtin_mad_sat.cpp builtin_modf.cpp builtin_nextafter.cpp builtin_remquo.cpp builtin_shuffle.cpp builtin_shuffle2.cpp builtin_sign.cpp builtin_sinpi.cpp builtin_lgamma.cpp builtin_lgamma_r.cpp builtin_tgamma.cpp buildin_work_dim.cpp builtin_global_size.cpp builtin_local_size.cpp builtin_global_id.cpp builtin_num_groups.cpp builtin_local_id.cpp builtin_acos_asin.cpp builtin_convert_sat.cpp runtime_createcontext.cpp runtime_null_kernel_arg.cpp runtime_event.cpp compiler_double.cpp compiler_double_2.cpp compiler_double_3.cpp compiler_double_4.cpp compiler_long.cpp compiler_long_2.cpp compiler_long_convert.cpp compiler_long_shl.cpp compiler_long_shr.cpp compiler_long_asr.cpp compiler_long_mult.cpp compiler_long_cmp.cpp compiler_bool_cross_basic_block.cpp load_program_from_bin.cpp enqueue_copy_buf.cpp utest_assert.cpp utest.cpp utest_file_map.cpp utest_helper.cpp) SET (kernel_bin ${CMAKE_CURRENT_SOURCE_DIR}/../kernels/compiler_ceil) ADD_CUSTOM_COMMAND( OUTPUT ${kernel_bin}.bin COMMAND ${CMAKE_CURRENT_BINARY_DIR}/../backend/src/gbe_bin_generater ${kernel_bin}.cl -o${kernel_bin}.bin DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/../backend/src/gbe_bin_generater ${kernel_bin}.cl ) ADD_CUSTOM_TARGET(kernel_bin.bin DEPENDS ${kernel_bin}.bin) if (EGL_FOUND AND MESA_SOURCE_FOUND) SET(utests_sources ${utests_sources} compiler_fill_gl_image.cpp) SET(CMAKE_CXX_FLAGS "-DHAS_EGL ${CMAKE_CXX_FLAGS}") SET(CMAKE_C_FLAGS "-DHAS_EGL ${CMAKE_C_FLAGS}") endif (EGL_FOUND AND MESA_SOURCE_FOUND) ADD_LIBRARY(utests SHARED ${utests_sources}) TARGET_LINK_LIBRARIES(utests cl m ${OPENGL_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT}) ADD_EXECUTABLE(utest_run utest_run.cpp) TARGET_LINK_LIBRARIES(utest_run utests) ADD_DEPENDENCIES (utest_run kernel_bin.bin) ADD_EXECUTABLE(flat_address_space runtime_flat_address_space.cpp) TARGET_LINK_LIBRARIES(flat_address_space utests) Release_v0.3/utests/buildin_work_dim.cpp000066400000000000000000000013561223142177000205640ustar00rootroot00000000000000#include "utest_helper.hpp" static void buildin_work_dim(void) { // Setup kernel and buffers int result, err; OCL_CREATE_KERNEL("buildin_work_dim"); OCL_CREATE_BUFFER(buf[0], CL_MEM_READ_WRITE, sizeof(int), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); globals[0] = 1; globals[1] = 1; globals[2] = 1; locals[0] = 1; locals[1] = 1; locals[2] = 1; for( int i=1; i <= 3; i++ ) { // Run the kernel OCL_NDRANGE(i); err = clEnqueueReadBuffer( queue, buf[0], CL_TRUE, 0, sizeof(int), &result, 0, NULL, NULL); if (err != CL_SUCCESS) { printf("Error: Failed to read output array! %d\n", err); exit(1); } OCL_ASSERT( result == i); } } MAKE_UTEST_FROM_FUNCTION(buildin_work_dim); Release_v0.3/utests/builtin_acos_asin.cpp000066400000000000000000000054371223142177000207340ustar00rootroot00000000000000#include "utest_helper.hpp" #include #include #define udebug 0 #define printf_c(...) \ {\ printf("\033[1m\033[40;31m");\ printf( __VA_ARGS__ );\ printf("\033[0m");\ } const float input_data[] = {-30, -1, -0.92, -0.5, -0.09, 0, 0.09, 0.5, 0.92, 1, 30}; const int count_input = sizeof(input_data) / sizeof(input_data[0]); const int max_function = 5; static void cpu_compiler_math(float *dst, const float *src) { const float x = *src; dst[0] = acos(x); dst[1] = acosh(x); dst[2] = asin(x); dst[3] = asinh(x); dst[4] = x; } static void builtin_acos_asin(void) { // Setup kernel and buffers int k, i, index_cur; float gpu_data[max_function * count_input] = {0}, cpu_data[max_function * count_input] = {0}; OCL_CREATE_KERNEL("builtin_acos_asin"); OCL_CREATE_BUFFER(buf[0], CL_MEM_READ_WRITE, count_input * max_function * sizeof(float), NULL); OCL_CREATE_BUFFER(buf[1], CL_MEM_READ_WRITE, count_input * sizeof(float), NULL); OCL_CREATE_BUFFER(buf[2], CL_MEM_READ_WRITE, sizeof(int), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); globals[0] = count_input; locals[0] = 1; clEnqueueWriteBuffer( queue, buf[1], CL_TRUE, 0, count_input * sizeof(float), input_data, 0, NULL, NULL); clEnqueueWriteBuffer( queue, buf[2], CL_TRUE, 0, sizeof(int), &max_function , 0, NULL, NULL); // Run the kernel OCL_NDRANGE( 1 ); clEnqueueReadBuffer( queue, buf[0], CL_TRUE, 0, sizeof(float) * max_function * count_input, gpu_data, 0, NULL, NULL); for (k = 0; (uint)k < count_input; k++) { cpu_compiler_math( cpu_data + k * max_function, input_data + k); for (i = 0; i < max_function; i++) { index_cur = k * max_function + i; #if udebug if (isinf(cpu_data[index_cur]) && !isinf(gpu_data[index_cur])){ printf_c("%d/%d: %f -> gpu:%f cpu:%f\n", k, i, input_data[k], gpu_data[index_cur], cpu_data[index_cur]); } else if (isnan(cpu_data[index_cur]) && !isnan(gpu_data[index_cur])){ printf_c("%d/%d: %f -> gpu:%f cpu:%f\n", k, i, input_data[k], gpu_data[index_cur], cpu_data[index_cur]); } else if(fabs(gpu_data[index_cur] - cpu_data[index_cur]) > 1e-3f){ printf_c("%d/%d: %f -> gpu:%f cpu:%f\n", k, i, input_data[k], gpu_data[index_cur], cpu_data[index_cur]); } else printf("%d/%d: %f -> gpu:%f cpu:%f\n", k, i, input_data[k], gpu_data[index_cur], cpu_data[index_cur]); #else if (isinf(cpu_data[index_cur])) OCL_ASSERT(isinf(gpu_data[index_cur])); else if (isnan(cpu_data[index_cur])) OCL_ASSERT(isnan(gpu_data[index_cur])); else { OCL_ASSERT(fabs(gpu_data[index_cur] - cpu_data[index_cur]) < 1e-3f); } #endif } } } MAKE_UTEST_FROM_FUNCTION(builtin_acos_asin) Release_v0.3/utests/builtin_atan2.cpp000066400000000000000000000020731223142177000177730ustar00rootroot00000000000000#include #include "utest_helper.hpp" void builtin_atan2(void) { const int n = 1024; float y[n], x[n]; // Setup kernel and buffers OCL_CREATE_KERNEL("builtin_atan2"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(float), NULL); OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(float), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); globals[0] = n; locals[0] = 16; OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); for (int i = 0; i < n; ++i) { y[i] = ((float*) buf_data[0])[i] = (rand()&255) * 0.01f; x[i] = ((float*) buf_data[1])[i] = (rand()&255) * 0.01f; } OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); OCL_NDRANGE(1); OCL_MAP_BUFFER(2); float *dst = (float*) buf_data[2]; for (int i = 0; i < n; ++i) { float cpu = atan2f(y[i], x[i]); float gpu = dst[i]; if (fabsf(cpu - gpu) >= 1e-2) { printf("%f %f %f %f\n", y[i], x[i], cpu, gpu); OCL_ASSERT(0); } } OCL_UNMAP_BUFFER(2); } MAKE_UTEST_FROM_FUNCTION (builtin_atan2); Release_v0.3/utests/builtin_bitselect.cpp000066400000000000000000000024551223142177000207500ustar00rootroot00000000000000#include "utest_helper.hpp" int as_int(float f) { void *p = &f; return *(int *)p; } int cpu(int a, int b, int c) { return (a & ~c) | (b & c); } void builtin_bitselect(void) { const int n = 32; float src1[n], src2[n], src3[n]; // Setup kernel and buffers OCL_CREATE_KERNEL("builtin_bitselect"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(float), NULL); OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(float), NULL); OCL_CREATE_BUFFER(buf[3], 0, n * sizeof(float), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); OCL_SET_ARG(3, sizeof(cl_mem), &buf[3]); globals[0] = n; locals[0] = 16; OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); OCL_MAP_BUFFER(2); for (int i = 0; i < n; ++i) { src1[i] = ((float*)buf_data[0])[i] = rand() * 0.1f; src2[i] = ((float*)buf_data[1])[i] = rand() * 0.1f; src3[i] = ((float*)buf_data[2])[i] = rand() * 0.1f; } OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); OCL_UNMAP_BUFFER(2); OCL_NDRANGE(1); OCL_MAP_BUFFER(3); for (int i = 0; i < n; ++i) OCL_ASSERT(((int*)buf_data[3])[i] == cpu(as_int(src1[i]), as_int(src2[i]), as_int(src3[i]))); OCL_UNMAP_BUFFER(3); } MAKE_UTEST_FROM_FUNCTION(builtin_bitselect); Release_v0.3/utests/builtin_convert_sat.cpp000066400000000000000000000047551223142177000213260ustar00rootroot00000000000000#include #include "utest_helper.hpp" typedef unsigned char uchar; typedef unsigned short ushort; int64_t my_rand(void) { int64_t x = rand() - RAND_MAX/2; int64_t y = rand() - RAND_MAX/2; return x * y; } #define DEF2(DST_TYPE, SRC_TYPE, DST_MIN, DST_MAX, REAL_SRC_TYPE) \ void builtin_convert_ ## SRC_TYPE ## _to_ ## DST_TYPE ## _sat(void) \ { \ const int n = 128; \ OCL_CREATE_KERNEL_FROM_FILE("builtin_convert_sat", "builtin_convert_" # SRC_TYPE "_to_" # DST_TYPE "_sat"); \ OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(REAL_SRC_TYPE), NULL); \ OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(DST_TYPE), NULL); \ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); \ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); \ globals[0] = n; \ locals[0] = 16; \ OCL_MAP_BUFFER(0); \ for (int i = 0; i < n; i++) \ ((REAL_SRC_TYPE *)buf_data[0])[i] = my_rand(); \ OCL_UNMAP_BUFFER(0); \ OCL_NDRANGE(1); \ OCL_MAP_BUFFER(0); \ OCL_MAP_BUFFER(1); \ for (int i = 0; i < n; i++) { \ REAL_SRC_TYPE src = ((REAL_SRC_TYPE *)buf_data[0])[i]; \ DST_TYPE dst; \ if ((double)src > (double)DST_MAX) \ dst = DST_MAX; \ else if ((double)src < (double)DST_MIN) \ dst = DST_MIN; \ else \ dst = src; \ OCL_ASSERT(((DST_TYPE *)buf_data[1])[i] == dst); \ } \ OCL_UNMAP_BUFFER(0); \ OCL_UNMAP_BUFFER(1); \ } \ MAKE_UTEST_FROM_FUNCTION(builtin_convert_ ## SRC_TYPE ## _to_ ## DST_TYPE ## _sat); #define DEF(DST_TYPE, SRC_TYPE, DST_MIN, DST_MAX) \ DEF2(DST_TYPE, SRC_TYPE, DST_MIN, DST_MAX, SRC_TYPE) DEF(char, uchar, -128, 127); DEF(char, short, -128, 127); DEF(char, ushort, -128, 127); DEF(char, int, -128, 127); DEF(char, uint, -128, 127); DEF2(char, long, -128, 127, int64_t); DEF(char, float, -128, 127); DEF(uchar, char, 0, 255); DEF(uchar, short, 0, 255); DEF(uchar, ushort, 0, 255); DEF(uchar, int, 0, 255); DEF(uchar, uint, 0, 255); DEF2(uchar, long, 0, 255, int64_t); DEF(uchar, float, 0, 255); DEF(short, ushort, -32768, 32767); DEF(short, int, -32768, 32767); DEF(short, uint, -32768, 32767); DEF2(short, long, -32768, 32767, int64_t); DEF(short, float, -32768, 32767); DEF(ushort, short, 0, 65535); DEF(ushort, int, 0, 65535); DEF(ushort, uint, 0, 65535); DEF2(ushort, long, 0, 65535, int64_t); DEF(ushort, float, 0, 65535); DEF(int, uint, -0x7FFFFFFF-1, 0x7FFFFFFF); DEF2(int, long, -0x7FFFFFFF-1, 0x7FFFFFFF, int64_t); DEF(int, float, -0x7FFFFFFF-1, 0x7FFFFFFF); DEF(uint, int, 0, 0xffffffffu); DEF2(uint, long, 0, 0xffffffffu, int64_t); DEF(uint, float, 0, 0xffffffffu); #undef DEF Release_v0.3/utests/builtin_frexp.cpp000066400000000000000000000026271223142177000201170ustar00rootroot00000000000000#include #include "utest_helper.hpp" void builtin_frexp(void) { const int n = 32; float src[n]; // Setup kernel and buffers OCL_CREATE_KERNEL("builtin_frexp"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(float), NULL); OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(int), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); globals[0] = n; locals[0] = 16; OCL_MAP_BUFFER(0); src[0] = ((float*)buf_data[0])[0] = 0.f; src[1] = ((float*)buf_data[0])[1] = -0.f; src[2] = ((float*)buf_data[0])[2] = nanf(""); src[3] = ((float*)buf_data[0])[3] = INFINITY; src[4] = ((float*)buf_data[0])[4] = -INFINITY; for (int i = 5; i < n; ++i) src[i] = ((float*)buf_data[0])[i] = (rand() & 255) * 0.1f - 12.8f; OCL_UNMAP_BUFFER(0); OCL_NDRANGE(1); OCL_MAP_BUFFER(1); OCL_MAP_BUFFER(2); float *dst = (float*)buf_data[1]; int *exp = (int*)buf_data[2]; int w; OCL_ASSERT(dst[0] == 0.f && exp[0] == 0); OCL_ASSERT(dst[1] == -0.f && exp[1] == 0); OCL_ASSERT(isnanf(dst[2])); OCL_ASSERT(dst[3] == INFINITY); OCL_ASSERT(dst[4] == -INFINITY); for (int i = 5; i < n; ++i) { OCL_ASSERT(fabsf(dst[i] - frexpf(src[i], &w)) < 1e-5); OCL_ASSERT(exp[i] == w); } OCL_UNMAP_BUFFER(1); OCL_UNMAP_BUFFER(2); } MAKE_UTEST_FROM_FUNCTION(builtin_frexp); Release_v0.3/utests/builtin_global_id.cpp000066400000000000000000000032011223142177000206740ustar00rootroot00000000000000/* According to the OpenCL v1.1 & v1.2 chapter 6.11. Now define global size as following: globals[0] = 3; globals[1] = 4; globals[2] = 5; Kernel: id = get_global_id(0) + get_global_id(1)*3 + get_global_id(2)*3*4 dimension:1 0 1 2 dimension:2 0 1 2 3 4 5 6 7 8 9 10 11 dimension:3 0 1 2 12 13 14 24 25 26 36 37 38 48 49 50 3 4 5 15 16 17 27 28 29 39 40 41 51 52 53 6 7 8 18 19 20 30 31 32 42 43 44 54 55 56 9 10 11 21 22 23 33 34 35 45 46 47 57 58 59 */ #define udebug 0 #include "utest_helper.hpp" static void builtin_global_id(void) { // Setup kernel and buffers int dim, global_id[80], err, i, buf_len=1; OCL_CREATE_KERNEL("builtin_global_id"); OCL_CREATE_BUFFER(buf[0], CL_MEM_READ_WRITE, sizeof(int)*80, NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); for( dim=1; dim <= 3; dim++ ) { buf_len = 1; for(i=1; i <= dim; i++) { globals[i - 1] = 2 + i; locals[i - 1] = 2 + i; buf_len *= 2 + i; } for(i=dim+1; i <= 3; i++) { globals[i - 1] = 0; locals[i - 1] = 0; } // Run the kernel OCL_NDRANGE( dim ); clFinish(queue); err = clEnqueueReadBuffer( queue, buf[0], CL_TRUE, 0, sizeof(int) * buf_len, &global_id, 0, NULL, NULL); if (err != CL_SUCCESS) { printf("Error: Failed to read output array! %d\n", err); exit(1); } #if udebug for(i = 0; i < buf_len; i++) { printf("%2d ", global_id[i]); if ((i + 1) % 3 == 0) printf("\n"); } #endif for( i = 0; i < buf_len; i++) OCL_ASSERT( global_id[i] == i); } } MAKE_UTEST_FROM_FUNCTION(builtin_global_id); Release_v0.3/utests/builtin_global_size.cpp000066400000000000000000000057451223142177000212710ustar00rootroot00000000000000/* According to the OpenCL v1.1 & v1.2 chapter 6.11, the behavior of function get_global_size should be as following: globals[0] = 3; globals[1] = 4; globals[2] = 5; #ifdef CL_VERSION_1_2 | CL_VERSION_1_1: get_global_size(-1) = 1 (dimension:1) get_global_size(0) = 3 (dimension:1) get_global_size(1) = 1 (dimension:1) get_global_size(2) = 1 (dimension:1) get_global_size(-1) = 1 (dimension:2) get_global_size(0) = 3 (dimension:2) get_global_size(1) = 4 (dimension:2) get_global_size(2) = 1 (dimension:2) get_global_size(3) = 1 (dimension:2) get_global_size(-1) = 1 (dimension:3) get_global_size(0) = 3 (dimension:3) get_global_size(1) = 4 (dimension:3) get_global_size(2) = 5 (dimension:3) get_global_size(3) = 1 (dimension:3) get_global_size(4) = 1 (dimension:3) #ifdef CL_VERSION_1_0: get_global_size(-1) = 0 (dimension:1) get_global_size(0) = 3 (dimension:1) get_global_size(1) = 0 (dimension:1) get_global_size(2) = 0 (dimension:1) get_global_size(-1) = 0 (dimension:2) get_global_size(0) = 3 (dimension:2) get_global_size(1) = 4 (dimension:2) get_global_size(2) = 0 (dimension:2) get_global_size(3) = 1 (dimension:2) get_global_size(-1) = 0 (dimension:3) get_global_size(0) = 3 (dimension:3) get_global_size(1) = 4 (dimension:3) get_global_size(2) = 5 (dimension:3) get_global_size(3) = 0 (dimension:3) get_global_size(4) = 0 (dimension:3) */ #include "utest_helper.hpp" static void builtin_global_size(void) { // Setup kernel and buffers int dim, dim_arg_global, global_size, err; OCL_CREATE_KERNEL("builtin_global_size"); OCL_CREATE_BUFFER(buf[0], CL_MEM_READ_WRITE, sizeof(int), NULL); OCL_CREATE_BUFFER(buf[1], CL_MEM_READ_WRITE, sizeof(int), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = 3; globals[1] = 4; globals[2] = 5; locals[0] = 1; locals[1] = 1; locals[2] = 1; for( dim=1; dim <= 3; dim++ ) { for( dim_arg_global = -1; dim_arg_global <= dim + 1; dim_arg_global++ ) { err = clEnqueueWriteBuffer( queue, buf[1], CL_TRUE, 0, sizeof(int), &dim_arg_global, 0, NULL, NULL); if (err != CL_SUCCESS) { printf("Error: Failed to write to source array!\n"); exit(1); } // Run the kernel OCL_NDRANGE( dim ); err = clEnqueueReadBuffer( queue, buf[0], CL_TRUE, 0, sizeof(int), &global_size, 0, NULL, NULL); if (err != CL_SUCCESS) { printf("Error: Failed to read output array! %d\n", err); exit(1); } //printf("get_global_size(%d) = %d (dimension:%d)\n", dim_arg_global, global_size, dim); if ( dim_arg_global >= 0 && dim_arg_global < dim) OCL_ASSERT( global_size == dim_arg_global + 3); else { #if defined(CL_VERSION_1_2) || defined(CL_VERSION_1_1) OCL_ASSERT( global_size == 1); #elif defined(CL_VERSION_1_0) OCL_ASSERT( global_size == 0); #else OCL_ASSERT( global_size == 1); #endif } } } } MAKE_UTEST_FROM_FUNCTION(builtin_global_size); Release_v0.3/utests/builtin_lgamma.cpp000066400000000000000000000016441223142177000202270ustar00rootroot00000000000000#include #include "utest_helper.hpp" void builtin_lgamma(void) { const int n = 1024; float src[n]; // Setup kernel and buffers OCL_CREATE_KERNEL("builtin_lgamma"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(float), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = n; locals[0] = 16; for (int j = 0; j < 1024; j++) { OCL_MAP_BUFFER(0); for (int i = 0; i < n; ++i) { src[i] = ((float*) buf_data[0])[i] = (j * n + i + 1) * 0.001f; } OCL_UNMAP_BUFFER(0); OCL_NDRANGE(1); OCL_MAP_BUFFER(1); float *dst = (float*) buf_data[1]; for (int i = 0; i < n; ++i) { float cpu = lgamma(src[i]); float gpu = dst[i]; if (fabsf(cpu - gpu) >= 1e-3) { printf("%f %f %f\n", src[i], cpu, gpu); OCL_ASSERT(0); } } OCL_UNMAP_BUFFER(1); } } MAKE_UTEST_FROM_FUNCTION (builtin_lgamma); Release_v0.3/utests/builtin_lgamma_r.cpp000066400000000000000000000022331223142177000205430ustar00rootroot00000000000000#include #include "utest_helper.hpp" void builtin_lgamma_r(void) { const int n = 1024; float src[n]; // Setup kernel and buffers OCL_CREATE_KERNEL("builtin_lgamma_r"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(float), NULL); OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(int), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); globals[0] = n; locals[0] = 16; for (int j = 0; j < 1024; j++) { OCL_MAP_BUFFER(0); for (int i = 0; i < n; ++i) { src[i] = ((float*) buf_data[0])[i] = (j * n + i + 1) * 0.001f; } OCL_UNMAP_BUFFER(0); OCL_NDRANGE(1); OCL_MAP_BUFFER(1); OCL_MAP_BUFFER(2); float *dst = (float*) buf_data[1]; for (int i = 0; i < n; ++i) { int cpu_signp; float cpu = lgamma_r(src[i], &cpu_signp); int gpu_signp = ((int*)buf_data[2])[i]; float gpu = dst[i]; if (cpu_signp != gpu_signp || fabsf(cpu - gpu) >= 1e-3) { printf("%f %f %f\n", src[i], cpu, gpu); OCL_ASSERT(0); } } OCL_UNMAP_BUFFER(1); OCL_UNMAP_BUFFER(2); } } MAKE_UTEST_FROM_FUNCTION (builtin_lgamma_r); Release_v0.3/utests/builtin_local_id.cpp000066400000000000000000000034171223142177000205370ustar00rootroot00000000000000/* According to the OpenCL v1.1 & v1.2 chapter 6.11. Now define local and global size as following: globals[0] = 4; globals[1] = 9; globals[2] = 16; locals[0] = 2; locals[1] = 3; locals[2] = 4; Kernel: int id = get_local_id(0) + get_group_id(0)*2 + \ get_local_id(1) * 4 + get_group_id(1)*12 +\ get_local_id(2) *36 + get_group_id(2)*144; dimension:1 0 1 2 3 dimension:2 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 dimension:3 0 1 2 3 4 5 6 7 ... 139 140 141 142 143 ... ... 429 430 431 432 433 434 ... 571 572 573 574 575 */ #define udebug 0 #include "utest_helper.hpp" static void builtin_local_id(void) { // Setup kernel and buffers int dim, local_id[576], err, i, buf_len=1; OCL_CREATE_KERNEL("builtin_local_id"); OCL_CREATE_BUFFER(buf[0], CL_MEM_READ_WRITE, sizeof(int)*576, NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); for( dim=1; dim <= 3; dim++ ) { buf_len = 1; for(i=1; i <= dim; i++) { locals[i - 1] = i + 1; globals[i - 1] = (i + 1) * (i + 1); buf_len *= ((i + 1) * (i + 1)); } for(i = dim+1; i <= 3; i++) { globals[i - 1] = 0; locals[i - 1] = 0; } // Run the kernel OCL_NDRANGE( dim ); clFinish(queue); err = clEnqueueReadBuffer( queue, buf[0], CL_TRUE, 0, sizeof(int) * buf_len, &local_id, 0, NULL, NULL); if (err != CL_SUCCESS) { printf("Error: Failed to read output array! %d\n", err); exit(1); } #if udebug for(i = 0; i < buf_len; i++) { printf("%2d ", local_id[i]); if ((i + 1) % 4 == 0) printf("\n"); } #endif for( i = 0; i < buf_len; i++) OCL_ASSERT( local_id[i] == i); } } MAKE_UTEST_FROM_FUNCTION(builtin_local_id); Release_v0.3/utests/builtin_local_size.cpp000066400000000000000000000043501223142177000211120ustar00rootroot00000000000000/* According to the OpenCL v1.1 & v1.2 chapter 6.11, the behavior of function get_local_size should be as following: globals[0] = 3; globals[1] = 4; globals[2] = 5; locals[0] = 3; locals[1] = 4; locals[2] = 5; get_local_size(-1) = 1 (dimension:1) get_local_size(0) = 3 (dimension:1) get_local_size(1) = 1 (dimension:1) get_local_size(2) = 1 (dimension:1) get_local_size(-1) = 1 (dimension:2) get_local_size(0) = 3 (dimension:2) get_local_size(1) = 4 (dimension:2) get_local_size(2) = 1 (dimension:2) get_local_size(3) = 1 (dimension:2) get_local_size(-1) = 1 (dimension:3) get_local_size(0) = 3 (dimension:3) get_local_size(1) = 4 (dimension:3) get_local_size(2) = 5 (dimension:3) get_local_size(3) = 1 (dimension:3) get_local_size(4) = 1 (dimension:3) */ #include "utest_helper.hpp" #define udebug 0 static void builtin_local_size(void) { // Setup kernel and buffers int dim, dim_arg_global, local_size, err; OCL_CREATE_KERNEL("builtin_local_size"); OCL_CREATE_BUFFER(buf[0], CL_MEM_READ_WRITE, sizeof(int), NULL); OCL_CREATE_BUFFER(buf[1], CL_MEM_READ_WRITE, sizeof(int), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = 3; globals[1] = 4; globals[2] = 5; locals[0] = 3; locals[1] = 4; locals[2] = 5; for( dim=1; dim <= 3; dim++ ) { for( dim_arg_global = -1; dim_arg_global <= dim + 1; dim_arg_global++ ) { err = clEnqueueWriteBuffer( queue, buf[1], CL_TRUE, 0, sizeof(int), &dim_arg_global, 0, NULL, NULL); if (err != CL_SUCCESS) { printf("Error: Failed to write to source array!\n"); exit(1); } // Run the kernel OCL_NDRANGE( dim ); err = clEnqueueReadBuffer( queue, buf[0], CL_TRUE, 0, sizeof(int), &local_size, 0, NULL, NULL); if (err != CL_SUCCESS) { printf("Error: Failed to read output array! %d\n", err); exit(1); } #if udebug printf("get_local_size(%d) = %d (dimension:%d)\n", dim_arg_global, local_size, dim); #endif if ( dim_arg_global >= 0 && dim_arg_global < dim) OCL_ASSERT( local_size == dim_arg_global + 3); else { OCL_ASSERT( local_size == 1); } } } } MAKE_UTEST_FROM_FUNCTION(builtin_local_size); Release_v0.3/utests/builtin_mad_sat.cpp000066400000000000000000000023541223142177000204000ustar00rootroot00000000000000#include "utest_helper.hpp" void builtin_mad_sat(void) { const int n = 32; short src1[n], src2[n], src3[n]; srand(0); // Setup kernel and buffers OCL_CREATE_KERNEL("builtin_mad_sat"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(short), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(short), NULL); OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(short), NULL); OCL_CREATE_BUFFER(buf[3], 0, n * sizeof(short), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); OCL_SET_ARG(3, sizeof(cl_mem), &buf[3]); globals[0] = n; locals[0] = 16; OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); OCL_MAP_BUFFER(2); for (int i = 0; i < n; ++i) { src1[i] = ((short*)buf_data[0])[i] = rand(); src2[i] = ((short*)buf_data[1])[i] = rand(); src3[i] = ((short*)buf_data[2])[i] = rand(); } OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); OCL_UNMAP_BUFFER(2); OCL_NDRANGE(1); OCL_MAP_BUFFER(3); for (int i = 0; i < n; ++i) { int a = (int)src1[i] * (int)src2[i] + (int)src3[i]; a = a > 0x7FFF ? 0x7FFF : (a < -0x8000 ? -0x8000 : a); OCL_ASSERT(((short*)buf_data[3])[i] == (short)a); } OCL_UNMAP_BUFFER(3); } MAKE_UTEST_FROM_FUNCTION(builtin_mad_sat); Release_v0.3/utests/builtin_modf.cpp000066400000000000000000000027051223142177000177150ustar00rootroot00000000000000#include #include #include "utest_helper.hpp" void builtin_modf(void) { const int n = 32; float src[n]; // Setup kernel and buffers OCL_CREATE_KERNEL("builtin_modf"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(float), NULL); OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(float), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); globals[0] = n; locals[0] = 16; src[0] = INFINITY; src[1] = -INFINITY; src[2] = nanf(""); src[3] = 0; src[4] = 1.5f; src[5] = 2.5f; src[6] = -2.5f; src[7] = 20; src[8] = 21; src[9] = 89.5f; OCL_MAP_BUFFER(0); memcpy(buf_data[0], src, n * sizeof(float)); OCL_UNMAP_BUFFER(0); OCL_NDRANGE(1); OCL_MAP_BUFFER(1); OCL_MAP_BUFFER(2); float *dst = (float *)buf_data[1]; float *it = (float *)buf_data[2]; OCL_ASSERT(dst[0] == 0 && it[0] == INFINITY); OCL_ASSERT(dst[1] == -0.f && it[1] == -INFINITY); OCL_ASSERT(isnanf(dst[2]) && isnanf(it[2])); OCL_ASSERT(dst[3] == 0 && it[3] == 0); OCL_ASSERT(dst[4] == 0.5f && it[4] == 1); OCL_ASSERT(dst[5] == 0.5f && it[5] == 2); OCL_ASSERT(dst[6] == -0.5f && it[6] == -2); OCL_ASSERT(dst[7] == 0 && it[7] == 20); OCL_ASSERT(dst[8] == 0 && it[8] == 21); OCL_ASSERT(dst[9] == 0.5f && it[9] == 89); OCL_UNMAP_BUFFER(1); OCL_UNMAP_BUFFER(2); } MAKE_UTEST_FROM_FUNCTION(builtin_modf); Release_v0.3/utests/builtin_nextafter.cpp000066400000000000000000000032161223142177000207660ustar00rootroot00000000000000#include #include #include "utest_helper.hpp" static int as_int(float f) { void *p = &f; return *(int *)p; } void builtin_nextafter(void) { const int n = 16; float src1[n], src2[n]; // Setup kernel and buffers OCL_CREATE_KERNEL("builtin_nextafter"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(float), NULL); OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(float), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); globals[0] = n; locals[0] = 16; src1[0] = nanf(""), src2[0] = 1.1f; src1[1] = 2.2f, src2[1] = nanf(""); src1[2] = nanf(""), src2[2] = nanf(""); src1[3] = 123.4f, src2[3] = 123.4f; src1[4] = 0.f, src2[4] = 1.f; src1[5] = -0.f, src2[5] = -1.f; for (int i = 6; i < n; ++i) { src1[i] = (rand() & 255) * 0.1f - 12.8f; src2[i] = (rand() & 255) * 0.1f - 12.8f; } OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); memcpy(buf_data[0], src1, n * sizeof(float)); memcpy(buf_data[1], src2, n * sizeof(float)); OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); OCL_NDRANGE(1); OCL_MAP_BUFFER(2); float *dest = (float *)buf_data[2]; if (0) for (int i = 0; i < n; ++i) printf("%d %x %x %x %x\n", i, as_int(src1[i]), as_int(src2[i]), as_int(dest[i]), as_int(nextafterf(src1[i], src2[i]))); OCL_ASSERT(isnanf(dest[0])); OCL_ASSERT(isnanf(dest[1])); OCL_ASSERT(isnanf(dest[2])); for (int i = 3; i < n; ++i) OCL_ASSERT(dest[i] == nextafterf(src1[i], src2[i])); OCL_UNMAP_BUFFER(2); } MAKE_UTEST_FROM_FUNCTION(builtin_nextafter); Release_v0.3/utests/builtin_num_groups.cpp000066400000000000000000000042441223142177000211660ustar00rootroot00000000000000/* According to the OpenCL v1.1 & v1.2 chapter 6.11, the behavior of function get_num_groups should be as following: globals[0] = 1; globals[1] = 4; globals[2] = 9; locals[0] = 1; locals[1] = 2; locals[2] = 3; #ifdef CL_VERSION_1_2 | CL_VERSION_1_1: get_num_groups(-1) = 1 (dimension:1) get_num_groups(0) = 1 (dimension:1) get_num_groups(1) = 1 (dimension:1) get_num_groups(-1) = 1 (dimension:2) get_num_groups(0) = 1 (dimension:2) get_num_groups(1) = 2 (dimension:2) get_num_groups(2) = 1 (dimension:2) get_num_groups(-1) = 1 (dimension:3) get_num_groups(0) = 1 (dimension:3) get_num_groups(1) = 2 (dimension:3) get_num_groups(2) = 3 (dimension:3) get_num_groups(3) = 1 (dimension:3) */ #define udebug 0 #include "utest_helper.hpp" static void builtin_num_groups(void) { // Setup kernel and buffers int dim, dim_arg_global, num_groups, err; OCL_CREATE_KERNEL("builtin_num_groups"); OCL_CREATE_BUFFER(buf[0], CL_MEM_READ_WRITE, sizeof(int), NULL); OCL_CREATE_BUFFER(buf[1], CL_MEM_READ_WRITE, sizeof(int), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = 1; globals[1] = 4; globals[2] = 9; locals[0] = 1; locals[1] = 2; locals[2] = 3; for( dim=1; dim <= 3; dim++ ) { for( dim_arg_global = -1; dim_arg_global <= dim + 1; dim_arg_global++ ) { err = clEnqueueWriteBuffer( queue, buf[1], CL_TRUE, 0, sizeof(int), &dim_arg_global, 0, NULL, NULL); if (err != CL_SUCCESS) { printf("Error: Failed to write to source array!\n"); exit(1); } // Run the kernel OCL_NDRANGE( dim ); err = clEnqueueReadBuffer( queue, buf[0], CL_TRUE, 0, sizeof(int), &num_groups, 0, NULL, NULL); if (err != CL_SUCCESS) { printf("Error: Failed to read output array! %d\n", err); exit(1); } #if udebug printf("get_num_groups(%d) = %d (dimension:%d)\n", dim_arg_global, num_groups, dim); #endif if ( dim_arg_global >= 0 && dim_arg_global < dim) OCL_ASSERT( num_groups == dim_arg_global + 1 ); else { OCL_ASSERT( num_groups == 1); } } } } MAKE_UTEST_FROM_FUNCTION(builtin_num_groups); Release_v0.3/utests/builtin_remquo.cpp000066400000000000000000000037241223142177000203020ustar00rootroot00000000000000#include #include #include "utest_helper.hpp" void builtin_remquo(void) { const int n = 16; float src1[n], src2[n]; // Setup kernel and buffers OCL_CREATE_KERNEL("builtin_remquo"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(float), NULL); OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(float), NULL); OCL_CREATE_BUFFER(buf[3], 0, n * sizeof(int), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); OCL_SET_ARG(3, sizeof(cl_mem), &buf[3]); globals[0] = n; locals[0] = 16; src1[0] = 1, src2[0] = 0; src1[1] = 1, src2[1] = -0.f; src1[2] = INFINITY, src2[2] = 1; src1[3] = -INFINITY, src2[3] = 1; src1[4] = nanf(""), src2[4] = nanf(""); src1[5] = 1.625f, src2[5] = 1; src1[6] = -1.625f, src2[6] = 1; src1[7] = 1.625f, src2[7] = -1; src1[8] = -1.625f, src2[8] = -1; src1[9] = 5, src2[9] = 2; src1[10] = 3, src2[10] = 2; src1[11] = -0.f, src2[11] = 1; OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); memcpy(buf_data[0], src1, n * sizeof(float)); memcpy(buf_data[1], src2, n * sizeof(float)); OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); OCL_NDRANGE(1); OCL_MAP_BUFFER(2); OCL_MAP_BUFFER(3); float *dest = (float *)buf_data[2]; int *quo = (int *)buf_data[3]; OCL_ASSERT(isnanf(dest[0])); OCL_ASSERT(isnanf(dest[1])); OCL_ASSERT(isnanf(dest[2])); OCL_ASSERT(isnanf(dest[3])); OCL_ASSERT(isnanf(dest[4])); OCL_ASSERT(dest[5] == -0.375f && quo[5] == 2); OCL_ASSERT(dest[6] == 0.375f && quo[6] == -2); OCL_ASSERT(dest[7] == -0.375f && quo[7] == -2); OCL_ASSERT(dest[8] == 0.375f && quo[8] == 2); OCL_ASSERT(dest[9] == 1 && quo[9] == 2); OCL_ASSERT(dest[10] == -1 && quo[10] == 2); OCL_ASSERT(dest[11] == -0.f && quo[11] == 0); OCL_UNMAP_BUFFER(2); OCL_UNMAP_BUFFER(3); } MAKE_UTEST_FROM_FUNCTION(builtin_remquo); Release_v0.3/utests/builtin_shuffle.cpp000066400000000000000000000023121223142177000204160ustar00rootroot00000000000000#include "utest_helper.hpp" void builtin_shuffle(void) { const int n = 32; // Setup kernel and buffers OCL_CREATE_KERNEL("builtin_shuffle"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(float), NULL); OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(float), NULL); OCL_CREATE_BUFFER(buf[3], 0, n * sizeof(float), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); OCL_SET_ARG(3, sizeof(cl_mem), &buf[3]); globals[0] = n; locals[0] = 16; OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); for (int i = 0; i < n; i ++) { ((float *)(buf_data[0]))[i] = rand(); ((float *)(buf_data[1]))[i] = rand(); } OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); OCL_NDRANGE(1); OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); OCL_MAP_BUFFER(2); OCL_MAP_BUFFER(3); for (int i = 0; i < n; i ++) { OCL_ASSERT(((float *)(buf_data[0]))[i] == ((float *)(buf_data[3]))[i]); OCL_ASSERT(((float *)(buf_data[1]))[i] == ((float *)(buf_data[2]))[i]); } OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); OCL_UNMAP_BUFFER(2); OCL_UNMAP_BUFFER(3); } MAKE_UTEST_FROM_FUNCTION(builtin_shuffle); Release_v0.3/utests/builtin_shuffle2.cpp000066400000000000000000000023611223142177000205040ustar00rootroot00000000000000#include "utest_helper.hpp" void builtin_shuffle2(void) { const int n = 32; // Setup kernel and buffers OCL_CREATE_KERNEL("builtin_shuffle2"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(float), NULL); OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(float), NULL); OCL_CREATE_BUFFER(buf[3], 0, n * sizeof(float), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); OCL_SET_ARG(3, sizeof(cl_mem), &buf[3]); globals[0] = n; locals[0] = 16; OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); for (int i = 0; i < n; i ++) { ((float *)(buf_data[0]))[i] = (rand() & 15) * 0.1f; ((float *)(buf_data[1]))[i] = (rand() & 15) * 0.1f; } OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); OCL_NDRANGE(1); OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); OCL_MAP_BUFFER(2); OCL_MAP_BUFFER(3); for (int i = 0; i < n; i ++) { OCL_ASSERT(2 * ((float *)(buf_data[0]))[i] == ((float *)(buf_data[3]))[i]); OCL_ASSERT(2 * ((float *)(buf_data[1]))[i] == ((float *)(buf_data[2]))[i]); } OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); OCL_UNMAP_BUFFER(2); OCL_UNMAP_BUFFER(3); } MAKE_UTEST_FROM_FUNCTION(builtin_shuffle2); Release_v0.3/utests/builtin_sign.cpp000066400000000000000000000023011223142177000177200ustar00rootroot00000000000000#include #include "utest_helper.hpp" void builtin_sign(void) { const int n = 32; float src[n]; // Setup kernel and buffers OCL_CREATE_KERNEL("builtin_sign"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(float), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = n; locals[0] = 16; OCL_MAP_BUFFER(0); src[0] = ((float*)buf_data[0])[0] = nanf(""); src[1] = ((float*)buf_data[0])[1] = INFINITY; src[2] = ((float*)buf_data[0])[2] = 0.f; src[3] = ((float*)buf_data[0])[3] = -0.f; for (int i = 4; i < n; ++i) { src[i] = ((float*)buf_data[0])[i] = (rand() & 15) * 0.1 - 0.75; } OCL_UNMAP_BUFFER(0); OCL_NDRANGE(1); OCL_MAP_BUFFER(1); float *dst = (float*)buf_data[1]; OCL_ASSERT(dst[0] == 0); OCL_ASSERT(dst[1] == 1.f); OCL_ASSERT(dst[2] == 0.f); OCL_ASSERT(dst[3] == -0.f); for (int i = 4; i < n; ++i) { if (src[i] == 0.f) OCL_ASSERT(dst[i] == 0.f); else if (src[i] == -0.f) OCL_ASSERT(dst[i] == -0.f); else OCL_ASSERT(dst[i] == (src[i] > 0 ? 1 : -1)); } OCL_UNMAP_BUFFER(1); } MAKE_UTEST_FROM_FUNCTION(builtin_sign); Release_v0.3/utests/builtin_sinpi.cpp000066400000000000000000000042111223142177000201040ustar00rootroot00000000000000#include #include "utest_helper.hpp" static int as_int(float x) { union {float f; int i;} u; u.f = x; return u.i; } static float sinpi(float x) { /* * ==================================================== * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. * * Developed at SunPro, a Sun Microsystems, Inc. business. * Permission to use, copy, modify, and distribute this * software is freely granted, provided that this notice * is preserved. * ==================================================== */ float y, z; int n = 0, ix; const float pi = 3.1415927410e+00f; ix = as_int(x) & 0x7fffffff; if (ix < 0x3e800000) return sinf(pi * x); y = -x; z = floorf(y); if (z != y) { y *= 0.5f; y = 2.f * (y - floorf(y)); n = y * 4.f; } else { if (ix >= 0x4b800000) { y = 0; n = 0; } else { if (ix < 0x4b000000) z = y + 8.3886080000e+06f; int n = as_int(z); n &= 1; y = n; n <<= 2; } } switch (n) { case 0: y = sinf(pi * y); break; case 1: case 2: y = cosf(pi * ((float) 0.5 - y)); break; case 3: case 4: y = sinf(pi * (1.f - y)); break; case 5: case 6: y = -cosf(pi * (y - (float) 1.5)); break; default: y = sinf(pi * (y - (float) 2.0)); break; } return -y; } void builtin_sinpi(void) { const int n = 1024; float src[n]; // Setup kernel and buffers OCL_CREATE_KERNEL("builtin_sinpi"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(float), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = n; locals[0] = 16; for (int j = 0; j < 1000; j ++) { OCL_MAP_BUFFER(0); for (int i = 0; i < n; ++i) { src[i] = ((float*)buf_data[0])[i] = (j*n + i) * 0.01f; } OCL_UNMAP_BUFFER(0); OCL_NDRANGE(1); OCL_MAP_BUFFER(1); float *dst = (float*)buf_data[1]; for (int i = 0; i < n; ++i) { float cpu = sinpi(src[i]); OCL_ASSERT (fabsf(cpu - dst[i]) < 1e-4); } OCL_UNMAP_BUFFER(1); } } MAKE_UTEST_FROM_FUNCTION(builtin_sinpi); Release_v0.3/utests/builtin_tgamma.cpp000066400000000000000000000020011223142177000202230ustar00rootroot00000000000000#include #include "utest_helper.hpp" void builtin_tgamma(void) { const int n = 1024; float src[n]; // Setup kernel and buffers OCL_CREATE_KERNEL("builtin_tgamma"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(float), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = n; locals[0] = 16; for (int j = 0; j < 1024; j ++) { OCL_MAP_BUFFER(0); for (int i = 0; i < n; ++i) { src[i] = ((float*)buf_data[0])[i] = (j*n+i+1) * 0.001f; } OCL_UNMAP_BUFFER(0); OCL_NDRANGE(1); OCL_MAP_BUFFER(1); float *dst = (float*)buf_data[1]; for (int i = 0; i < n; ++i) { float cpu = gammaf(src[i]); if (isinf(cpu)) { OCL_ASSERT(isinf(dst[i])); } else if (fabsf(cpu - dst[i]) >= 1e-3) { printf("%f %f %f\n", src[i], cpu, dst[i]); OCL_ASSERT(0); } } OCL_UNMAP_BUFFER(1); } } MAKE_UTEST_FROM_FUNCTION(builtin_tgamma); Release_v0.3/utests/cl_create_kernel.cpp000066400000000000000000000010021223142177000205100ustar00rootroot00000000000000#include "utest_helper.hpp" static void test_create_kernel(void) { cl_ulong max_mem_size; cl_int status; OCL_CALL(clGetDeviceInfo, device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(max_mem_size), &max_mem_size, NULL); OCL_ASSERT(max_mem_size < (cl_ulong)-1); // increment the size so that following clCreateBuffer() would fail. ++max_mem_size; buf[0] = clCreateBuffer(ctx, 0, max_mem_size, NULL, &status); OCL_ASSERT(status == CL_INVALID_BUFFER_SIZE); } MAKE_UTEST_FROM_FUNCTION(test_create_kernel); Release_v0.3/utests/compiler_abs.cpp000066400000000000000000000150041223142177000176750ustar00rootroot00000000000000#include "utest_helper.hpp" #include "string.h" template struct cl_vec { T ptr[((N+1)/2)*2]; //align to 2 elements. typedef cl_vec vec_type; cl_vec(void) { memset(ptr, 0, sizeof(T) * ((N+1)/2)*2); } cl_vec(vec_type & other) { memset(ptr, 0, sizeof(T) * ((N+1)/2)*2); memcpy (this->ptr, other.ptr, sizeof(T) * N); } vec_type& operator= (vec_type & other) { memset(ptr, 0, sizeof(T) * ((N+1)/2)*2); memcpy (this->ptr, other.ptr, sizeof(T) * N); return *this; } template vec_type& operator= (cl_vec & other) { memset(ptr, 0, sizeof(T) * ((N+1)/2)*2); memcpy (this->ptr, other.ptr, sizeof(T) * N); return *this; } bool operator== (vec_type & other) { return !memcmp (this->ptr, other.ptr, sizeof(T) * N); } void abs(void) { int i = 0; for (; i < N; i++) { T f = ptr[i]; f = f < 0 ? -f : f; ptr[i] = f; } } }; template static void cpu (int global_id, cl_vec *src, cl_vec *dst) { cl_vec v = src[global_id]; v.abs(); dst[global_id] = v; } template static void cpu(int global_id, T *src, U *dst) { T f = src[global_id]; f = f < 0 ? -f : f; dst[global_id] = (U)f; } template static void gen_rand_val (cl_vec& vect) { int i = 0; memset(vect.ptr, 0, sizeof(T) * ((N+1)/2)*2); for (; i < N; i++) { vect.ptr[i] = static_cast((rand() & 63) - 32); } } template static void gen_rand_val (T & val) { val = static_cast((rand() & 63) - 32); } template inline static void print_data (T& val) { if (std::is_unsigned::value) printf(" %u", val); else printf(" %d", val); } template static void dump_data (cl_vec* src, cl_vec* dst, int n) { U* val = reinterpret_cast(dst); n = n*((N+1)/2)*2; printf("\nRaw: \n"); for (int32_t i = 0; i < (int32_t) n; ++i) { print_data(((T *)buf_data[0])[i]); } printf("\nCPU: \n"); for (int32_t i = 0; i < (int32_t) n; ++i) { print_data(val[i]); } printf("\nGPU: \n"); for (int32_t i = 0; i < (int32_t) n; ++i) { print_data(((U *)buf_data[1])[i]); } } template static void dump_data (T* src, U* dst, int n) { printf("\nRaw: \n"); for (int32_t i = 0; i < (int32_t) n; ++i) { print_data(((T *)buf_data[0])[i]); } printf("\nCPU: \n"); for (int32_t i = 0; i < (int32_t) n; ++i) { print_data(dst[i]); } printf("\nGPU: \n"); for (int32_t i = 0; i < (int32_t) n; ++i) { print_data(((U *)buf_data[1])[i]); } } template static void compiler_abs_with_type(void) { const size_t n = 16; U cpu_dst[16]; T cpu_src[16]; // Setup buffers OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(T), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(T), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = 16; locals[0] = 16; // Run random tests for (uint32_t pass = 0; pass < 8; ++pass) { OCL_MAP_BUFFER(0); /* Clear the dst buffer to avoid random data. */ OCL_MAP_BUFFER(1); memset(buf_data[1], 0, sizeof(U) * n); OCL_UNMAP_BUFFER(1); for (int32_t i = 0; i < (int32_t) n; ++i) { gen_rand_val(cpu_src[i]); } memcpy(buf_data[0], cpu_src, sizeof(T) * n); // Run the kernel on GPU OCL_NDRANGE(1); // Run on CPU for (int32_t i = 0; i < (int32_t) n; ++i) cpu(i, cpu_src, cpu_dst); // Compare OCL_MAP_BUFFER(1); // dump_data(cpu_src, cpu_dst, n); OCL_ASSERT(!memcmp(buf_data[1], cpu_dst, sizeof(T) * n)); OCL_UNMAP_BUFFER(1); OCL_UNMAP_BUFFER(0); } } #define ABS_TEST_TYPE(TYPE, UTYPE) \ static void compiler_abs_##TYPE (void) \ { \ OCL_CALL (cl_kernel_init, "compiler_abs.cl", "compiler_abs_"#TYPE, SOURCE, NULL); \ compiler_abs_with_type(); \ } \ MAKE_UTEST_FROM_FUNCTION(compiler_abs_##TYPE); typedef unsigned char uchar; typedef unsigned short ushort; typedef unsigned int uint; ABS_TEST_TYPE(int, uint) ABS_TEST_TYPE(short, ushort) ABS_TEST_TYPE(char, uchar) ABS_TEST_TYPE(uint, uint) ABS_TEST_TYPE(ushort, ushort) ABS_TEST_TYPE(uchar, uchar) typedef cl_vec int2; typedef cl_vec int3; typedef cl_vec int4; typedef cl_vec int8; typedef cl_vec int16; typedef cl_vec uint2; typedef cl_vec uint3; typedef cl_vec uint4; typedef cl_vec uint8; typedef cl_vec uint16; ABS_TEST_TYPE(int2, uint2) ABS_TEST_TYPE(int3, uint3) ABS_TEST_TYPE(int4, uint4) ABS_TEST_TYPE(int8, uint8) ABS_TEST_TYPE(int16, uint16) ABS_TEST_TYPE(uint2, uint2) ABS_TEST_TYPE(uint3, uint3) ABS_TEST_TYPE(uint4, uint4) ABS_TEST_TYPE(uint8, uint8) ABS_TEST_TYPE(uint16, uint16) typedef cl_vec char2; typedef cl_vec char3; typedef cl_vec char4; typedef cl_vec char8; typedef cl_vec char16; typedef cl_vec uchar2; typedef cl_vec uchar3; typedef cl_vec uchar4; typedef cl_vec uchar8; typedef cl_vec uchar16; ABS_TEST_TYPE(char2, uchar2) ABS_TEST_TYPE(char3, uchar3) ABS_TEST_TYPE(char4, uchar4) ABS_TEST_TYPE(char8, uchar8) ABS_TEST_TYPE(char16, uchar16) ABS_TEST_TYPE(uchar2, uchar2) ABS_TEST_TYPE(uchar3, uchar3) ABS_TEST_TYPE(uchar4, uchar4) ABS_TEST_TYPE(uchar8, uchar8) ABS_TEST_TYPE(uchar16, uchar16) typedef cl_vec short2; typedef cl_vec short3; typedef cl_vec short4; typedef cl_vec short8; typedef cl_vec short16; typedef cl_vec ushort2; typedef cl_vec ushort3; typedef cl_vec ushort4; typedef cl_vec ushort8; typedef cl_vec ushort16; ABS_TEST_TYPE(short2, ushort2) ABS_TEST_TYPE(short3, ushort3) ABS_TEST_TYPE(short4, ushort4) ABS_TEST_TYPE(short8, ushort8) ABS_TEST_TYPE(short16, ushort16) ABS_TEST_TYPE(ushort2, ushort2) ABS_TEST_TYPE(ushort3, ushort3) ABS_TEST_TYPE(ushort4, ushort4) ABS_TEST_TYPE(ushort8, ushort8) ABS_TEST_TYPE(ushort16, ushort16) Release_v0.3/utests/compiler_abs_diff.cpp000066400000000000000000000203651223142177000206730ustar00rootroot00000000000000#include "utest_helper.hpp" #include "string.h" template struct cl_vec { T ptr[((N+1)/2)*2]; //align to 2 elements. typedef cl_vec vec_type; cl_vec(void) { memset(ptr, 0, sizeof(T) * ((N+1)/2)*2); } cl_vec(vec_type & other) { memset(ptr, 0, sizeof(T) * ((N+1)/2)*2); memcpy (this->ptr, other.ptr, sizeof(T) * N); } vec_type& operator= (vec_type & other) { memset(ptr, 0, sizeof(T) * ((N+1)/2)*2); memcpy (this->ptr, other.ptr, sizeof(T) * N); return *this; } template vec_type& operator= (cl_vec & other) { memset(ptr, 0, sizeof(T) * ((N+1)/2)*2); memcpy (this->ptr, other.ptr, sizeof(T) * N); return *this; } bool operator== (vec_type & other) { return !memcmp (this->ptr, other.ptr, sizeof(T) * N); } void abs_diff(vec_type & other) { int i = 0; for (; i < N; i++) { T a = ptr[i]; T b = other.ptr[i]; T f = a > b ? (a - b) : (b - a); ptr[i] = f; } } }; template static void cpu (int global_id, cl_vec *x, cl_vec *y, cl_vec *diff) { cl_vec v = x[global_id]; v.abs_diff(y[global_id]); diff[global_id] = v; } template static void cpu(int global_id, T *x, T *y, U *diff) { T a = x[global_id]; T b = y[global_id]; U f = a > b ? (a - b) : (b - a); diff[global_id] = f; } template static void gen_rand_val (cl_vec& vect) { int i = 0; for (; i < N; i++) { vect.ptr[i] = static_cast((rand() & 63) - 32); } } template static void gen_rand_val (T & val) { val = static_cast((rand() & 63) - 32); } template inline static void print_data (T& val) { if (std::is_unsigned::value) printf(" %u", val); else printf(" %d", val); } template static void dump_data (cl_vec* x, cl_vec* y, cl_vec* diff, int n) { U* val = reinterpret_cast(diff); n = n*((N+1)/2)*2; printf("\nRaw x: \n"); for (int32_t i = 0; i < (int32_t) n; ++i) { print_data(((T *)buf_data[0])[i]); } printf("\nRaw y: \n"); for (int32_t i = 0; i < (int32_t) n; ++i) { print_data(((T *)buf_data[1])[i]); } printf("\nCPU diff: \n"); for (int32_t i = 0; i < (int32_t) n; ++i) { print_data(val[i]); } printf("\nGPU diff: \n"); for (int32_t i = 0; i < (int32_t) n; ++i) { print_data(((U *)buf_data[2])[i]); } } template static void dump_data (T* x, T* y, U* diff, int n) { printf("\nRaw x: \n"); for (int32_t i = 0; i < (int32_t) n; ++i) { print_data(((T *)buf_data[0])[i]); } printf("\nRaw y: \n"); for (int32_t i = 0; i < (int32_t) n; ++i) { print_data(((T *)buf_data[1])[i]); } printf("\nCPU diff: \n"); for (int32_t i = 0; i < (int32_t) n; ++i) { print_data(diff[i]); } printf("\nGPU diff: \n"); for (int32_t i = 0; i < (int32_t) n; ++i) { print_data(((U *)buf_data[2])[i]); } } template static void compiler_abs_diff_with_type(void) { const size_t n = 16; U cpu_diff[16]; T cpu_x[16]; T cpu_y[16]; // Setup buffers OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(T), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(T), NULL); OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(U), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); globals[0] = 16; locals[0] = 16; // Run random tests for (uint32_t pass = 0; pass < 8; ++pass) { OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); /* Clear the dst buffer to avoid random data. */ OCL_MAP_BUFFER(2); memset(buf_data[2], 0, sizeof(U) * n); OCL_UNMAP_BUFFER(2); for (int32_t i = 0; i < (int32_t) n; ++i) { gen_rand_val(cpu_x[i]); gen_rand_val(cpu_y[i]); } memcpy(buf_data[0], cpu_x, sizeof(T) * n); memcpy(buf_data[1], cpu_y, sizeof(T) * n); // Run the kernel on GPU OCL_NDRANGE(1); // Run on CPU for (int32_t i = 0; i < (int32_t) n; ++i) cpu(i, cpu_x, cpu_y, cpu_diff); // Compare OCL_MAP_BUFFER(2); // dump_data(cpu_x, cpu_y, cpu_diff, n); OCL_ASSERT(!memcmp(buf_data[2], cpu_diff, sizeof(T) * n)); OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); OCL_UNMAP_BUFFER(2); } } #define ABS_TEST_DIFF_TYPE_2(TYPE, CLTYPE, UTYPE) \ static void compiler_abs_diff_##CLTYPE (void) \ { \ OCL_CALL (cl_kernel_init, "compiler_abs_diff.cl", "compiler_abs_diff_"#CLTYPE, SOURCE, NULL); \ compiler_abs_diff_with_type(); \ } \ MAKE_UTEST_FROM_FUNCTION(compiler_abs_diff_##CLTYPE); #define ABS_TEST_DIFF_TYPE(TYPE, UTYPE) ABS_TEST_DIFF_TYPE_2(TYPE, TYPE, UTYPE) typedef unsigned char uchar; typedef unsigned short ushort; typedef unsigned int uint; typedef uint64_t ulong64; ABS_TEST_DIFF_TYPE(int, uint) ABS_TEST_DIFF_TYPE_2(int64_t, long, ulong64) ABS_TEST_DIFF_TYPE(short, ushort) ABS_TEST_DIFF_TYPE(char, uchar) ABS_TEST_DIFF_TYPE(uint, uint) ABS_TEST_DIFF_TYPE_2(ulong64, ulong, ulong64) ABS_TEST_DIFF_TYPE(ushort, ushort) ABS_TEST_DIFF_TYPE(uchar, uchar) typedef cl_vec int2; typedef cl_vec int3; typedef cl_vec int4; typedef cl_vec int8; typedef cl_vec int16; typedef cl_vec uint2; typedef cl_vec uint3; typedef cl_vec uint4; typedef cl_vec uint8; typedef cl_vec uint16; ABS_TEST_DIFF_TYPE(int2, uint2) ABS_TEST_DIFF_TYPE(int3, uint3) ABS_TEST_DIFF_TYPE(int4, uint4) ABS_TEST_DIFF_TYPE(int8, uint8) ABS_TEST_DIFF_TYPE(int16, uint16) ABS_TEST_DIFF_TYPE(uint2, uint2) ABS_TEST_DIFF_TYPE(uint3, uint3) ABS_TEST_DIFF_TYPE(uint4, uint4) ABS_TEST_DIFF_TYPE(uint8, uint8) ABS_TEST_DIFF_TYPE(uint16, uint16) typedef cl_vec long2; typedef cl_vec long3; typedef cl_vec long4; typedef cl_vec long8; typedef cl_vec long16; typedef cl_vec ulong2; typedef cl_vec ulong3; typedef cl_vec ulong4; typedef cl_vec ulong8; typedef cl_vec ulong16; ABS_TEST_DIFF_TYPE(long2, ulong2) ABS_TEST_DIFF_TYPE(long3, ulong3) ABS_TEST_DIFF_TYPE(long4, ulong4) ABS_TEST_DIFF_TYPE(long8, ulong8) ABS_TEST_DIFF_TYPE(long16, ulong16) ABS_TEST_DIFF_TYPE(ulong2, ulong2) ABS_TEST_DIFF_TYPE(ulong3, ulong3) ABS_TEST_DIFF_TYPE(ulong4, ulong4) ABS_TEST_DIFF_TYPE(ulong8, ulong8) ABS_TEST_DIFF_TYPE(ulong16, ulong16) typedef cl_vec char2; typedef cl_vec char3; typedef cl_vec char4; typedef cl_vec char8; typedef cl_vec char16; typedef cl_vec uchar2; typedef cl_vec uchar3; typedef cl_vec uchar4; typedef cl_vec uchar8; typedef cl_vec uchar16; ABS_TEST_DIFF_TYPE(char2, uchar2) ABS_TEST_DIFF_TYPE(char3, uchar3) ABS_TEST_DIFF_TYPE(char4, uchar4) ABS_TEST_DIFF_TYPE(char8, uchar8) ABS_TEST_DIFF_TYPE(char16, uchar16) ABS_TEST_DIFF_TYPE(uchar2, uchar2) ABS_TEST_DIFF_TYPE(uchar3, uchar3) ABS_TEST_DIFF_TYPE(uchar4, uchar4) ABS_TEST_DIFF_TYPE(uchar8, uchar8) ABS_TEST_DIFF_TYPE(uchar16, uchar16) typedef cl_vec short2; typedef cl_vec short3; typedef cl_vec short4; typedef cl_vec short8; typedef cl_vec short16; typedef cl_vec ushort2; typedef cl_vec ushort3; typedef cl_vec ushort4; typedef cl_vec ushort8; typedef cl_vec ushort16; ABS_TEST_DIFF_TYPE(short2, ushort2) ABS_TEST_DIFF_TYPE(short3, ushort3) ABS_TEST_DIFF_TYPE(short4, ushort4) ABS_TEST_DIFF_TYPE(short8, ushort8) ABS_TEST_DIFF_TYPE(short16, ushort16) ABS_TEST_DIFF_TYPE(ushort2, ushort2) ABS_TEST_DIFF_TYPE(ushort3, ushort3) ABS_TEST_DIFF_TYPE(ushort4, ushort4) ABS_TEST_DIFF_TYPE(ushort8, ushort8) ABS_TEST_DIFF_TYPE(ushort16, ushort16) Release_v0.3/utests/compiler_address_space.cpp000066400000000000000000000002471223142177000217330ustar00rootroot00000000000000#include "utest_helper.hpp" void compiler_address_space(void) { OCL_CREATE_KERNEL("compiler_address_space"); } MAKE_UTEST_FROM_FUNCTION(compiler_address_space); Release_v0.3/utests/compiler_argument_structure.cpp000066400000000000000000000011551223142177000230740ustar00rootroot00000000000000#include "utest_helper.hpp" struct hop { int x, y; }; void compiler_argument_structure(void) { const size_t n = 2048; hop h = {3, 4}; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_argument_structure"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(hop), &h); // Run the kernel globals[0] = n; locals[0] = 16; OCL_NDRANGE(1); OCL_MAP_BUFFER(0); // Check results for (uint32_t i = 0; i < n; ++i) OCL_ASSERT(((uint32_t*)buf_data[0])[i] == 7); } MAKE_UTEST_FROM_FUNCTION(compiler_argument_structure); Release_v0.3/utests/compiler_argument_structure_indirect.cpp000066400000000000000000000012531223142177000247540ustar00rootroot00000000000000#include "utest_helper.hpp" struct hop { int x[16]; }; void compiler_argument_structure_indirect(void) { const size_t n = 2048; hop h; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_argument_structure_indirect"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); for (int i = 0; i < 16; ++i) h.x[i] = i; OCL_SET_ARG(1, sizeof(hop), &h); // Run the kernel globals[0] = n; locals[0] = 16; OCL_NDRANGE(1); OCL_MAP_BUFFER(0); // Check results for (uint32_t i = 0; i < n; ++i) OCL_ASSERT(((uint32_t*)buf_data[0])[i] == 7); } MAKE_UTEST_FROM_FUNCTION(compiler_argument_structure_indirect); Release_v0.3/utests/compiler_arith_shift_right.cpp000066400000000000000000000021631223142177000226330ustar00rootroot00000000000000#include "utest_helper.hpp" static void cpu(int global_id, int *src, int *dst) { dst[global_id] = src[global_id] >> 24; } void compiler_arith_shift_right(void) { const size_t n = 16; int cpu_src[16]; int cpu_dst[16]; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_arith_shift_right"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(int), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = 16; locals[0] = 16; // Run random tests for (uint32_t pass = 0; pass < 8; ++pass) { OCL_MAP_BUFFER(0); for (int32_t i = 0; i < (int32_t) n; ++i) cpu_src[i] = ((int*)buf_data[0])[i] = 0x80000000 | rand(); OCL_UNMAP_BUFFER(0); // Run the kernel on GPU OCL_NDRANGE(1); // Run on CPU for (int32_t i = 0; i < (int32_t) n; ++i) cpu(i, cpu_src, cpu_dst); // Compare OCL_MAP_BUFFER(1); for (int32_t i = 0; i < (int32_t) n; ++i) OCL_ASSERT(((int *)buf_data[1])[i] == cpu_dst[i]); OCL_UNMAP_BUFFER(1); } } MAKE_UTEST_FROM_FUNCTION(compiler_arith_shift_right); Release_v0.3/utests/compiler_array.cpp000066400000000000000000000013221223142177000202440ustar00rootroot00000000000000#include "utest_helper.hpp" void compiler_array(void) { const size_t n = 16; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_array"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); // First control flow OCL_MAP_BUFFER(0); for (uint32_t i = 0; i < n; ++i) ((int32_t*)buf_data[0])[i] = -2; OCL_UNMAP_BUFFER(0); globals[0] = n; locals[0] = 16; OCL_NDRANGE(1); OCL_MAP_BUFFER(1); for (uint32_t i = 0; i < 16; ++i) OCL_ASSERT(((int32_t*)buf_data[1])[i] == 3); OCL_UNMAP_BUFFER(1); } MAKE_UTEST_FROM_FUNCTION(compiler_array); Release_v0.3/utests/compiler_array0.cpp000066400000000000000000000024501223142177000203270ustar00rootroot00000000000000#include "utest_helper.hpp" static void cpu(int global_id, int *src, int *dst) { int i; int final[16]; for (i = 0; i < 16; ++i) { int array[16], j; for (j = 0; j < 16; ++j) array[j] = global_id; for (j = 0; j < src[0]; ++j) array[j] = 1+src[j]; final[i] = array[i]; } dst[global_id] = final[global_id]; } void compiler_array0(void) { const size_t n = 16; int cpu_dst[16], cpu_src[16]; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_array0"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = 16; locals[0] = 16; // Run random tests for (uint32_t pass = 0; pass < 8; ++pass) { OCL_MAP_BUFFER(0); for (int32_t i = 0; i < (int32_t) n; ++i) cpu_src[i] = ((int32_t*)buf_data[0])[i] = rand() % 16; OCL_UNMAP_BUFFER(0); // Run the kernel on GPU OCL_NDRANGE(1); // Run on CPU for (int32_t i = 0; i <(int32_t) n; ++i) cpu(i, cpu_src, cpu_dst); // Compare OCL_MAP_BUFFER(1); for (int32_t i = 0; i < 11; ++i) OCL_ASSERT(((int32_t*)buf_data[1])[i] == cpu_dst[i]); OCL_UNMAP_BUFFER(1); } } MAKE_UTEST_FROM_FUNCTION(compiler_array0); Release_v0.3/utests/compiler_array1.cpp000066400000000000000000000024541223142177000203340ustar00rootroot00000000000000#include "utest_helper.hpp" static void cpu(int global_id, int *src, int *dst) { int final[16]; for (int i = 0; i < 16; ++i) { int array[16]; for (int j = 0; j < src[0]; ++j) array[j] = 1+src[0]; for (int j = src[0]; j < 16; ++j) array[j] = global_id; final[i] = array[i]; } dst[global_id] = final[global_id]; } void compiler_array1(void) { const size_t n = 16; int cpu_dst[16], cpu_src[16]; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_array1"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = 16; locals[0] = 16; // Run random tests for (uint32_t pass = 0; pass < 8; ++pass) { OCL_MAP_BUFFER(0); for (int32_t i = 0; i < (int32_t) n; ++i) cpu_src[i] = ((int32_t*)buf_data[0])[i] = rand() % 16; OCL_UNMAP_BUFFER(0); // Run the kernel on GPU OCL_NDRANGE(1); // Run on CPU for (int32_t i = 0; i <(int32_t) n; ++i) cpu(i, cpu_src, cpu_dst); // Compare OCL_MAP_BUFFER(1); for (int32_t i = 0; i < 11; ++i) OCL_ASSERT(((int32_t*)buf_data[1])[i] == cpu_dst[i]); OCL_UNMAP_BUFFER(1); } } MAKE_UTEST_FROM_FUNCTION(compiler_array1); Release_v0.3/utests/compiler_array2.cpp000066400000000000000000000024221223142177000203300ustar00rootroot00000000000000#include "utest_helper.hpp" static void cpu(int global_id, int *src, int *dst) { int final[16]; int array[16]; for (int j = 0; j < 16; ++j) array[j] = j; for (int j = 0; j < 16; ++j) final[j] = j+1; if (global_id == 15) dst[global_id] = final[global_id]; else dst[global_id] = array[15 - global_id]; } void compiler_array2(void) { const size_t n = 16; int cpu_dst[16], cpu_src[16]; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_array2"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = 16; locals[0] = 16; // Run random tests for (uint32_t pass = 0; pass < 8; ++pass) { OCL_MAP_BUFFER(0); for (int32_t i = 0; i < (int32_t) n; ++i) cpu_src[i] = ((int32_t*)buf_data[0])[i] = rand() % 16; OCL_UNMAP_BUFFER(0); // Run the kernel on GPU OCL_NDRANGE(1); // Run on CPU for (int32_t i = 0; i <(int32_t) n; ++i) cpu(i, cpu_src, cpu_dst); // Compare OCL_MAP_BUFFER(1); for (int32_t i = 0; i < 11; ++i) OCL_ASSERT(((int32_t*)buf_data[1])[i] == cpu_dst[i]); OCL_UNMAP_BUFFER(1); } } MAKE_UTEST_FROM_FUNCTION(compiler_array2); Release_v0.3/utests/compiler_array3.cpp000066400000000000000000000024161223142177000203340ustar00rootroot00000000000000#include "utest_helper.hpp" static void cpu(int global_id, int *src, int *dst) { int tmp[32]; for (int i = 0; i < 16; ++i) { for (int j = 0; j < 16; ++j) tmp[j] = global_id; for (int j = 0; j < src[0]; ++j) tmp[j] = 1+src[j]; tmp[16+i] = tmp[i]; } dst[global_id] = tmp[16+global_id]; } void compiler_array3(void) { const size_t n = 16; int cpu_dst[16], cpu_src[16]; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_array3"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = 16; locals[0] = 16; // Run random tests for (uint32_t pass = 0; pass < 8; ++pass) { OCL_MAP_BUFFER(0); for (int32_t i = 0; i < (int32_t) n; ++i) cpu_src[i] = ((int32_t*)buf_data[0])[i] = rand() % 16; OCL_UNMAP_BUFFER(0); // Run the kernel on GPU OCL_NDRANGE(1); // Run on CPU for (int32_t i = 0; i <(int32_t) n; ++i) cpu(i, cpu_src, cpu_dst); // Compare OCL_MAP_BUFFER(1); for (int32_t i = 0; i < 11; ++i) OCL_ASSERT(((int32_t*)buf_data[1])[i] == cpu_dst[i]); OCL_UNMAP_BUFFER(1); } } MAKE_UTEST_FROM_FUNCTION(compiler_array3); Release_v0.3/utests/compiler_async_copy.cpp000066400000000000000000000032401223142177000212760ustar00rootroot00000000000000#include "utest_helper.hpp" #include typedef unsigned char uchar; typedef unsigned short ushort; #define DEF(TYPE, KER_TYPE, VEC_SIZE) \ static void compiler_async_copy_##KER_TYPE##VEC_SIZE(void) \ { \ const size_t n = 1024; \ const size_t local_size = 32; \ const int copiesPerWorkItem = 5; \ \ /* Setup kernel and buffers */\ OCL_CREATE_KERNEL_FROM_FILE("compiler_async_copy", "compiler_async_copy_" # KER_TYPE # VEC_SIZE); \ OCL_CREATE_BUFFER(buf[0], 0, n * copiesPerWorkItem * sizeof(TYPE) * VEC_SIZE, NULL); \ OCL_CREATE_BUFFER(buf[1], 0, n * copiesPerWorkItem * sizeof(TYPE) * VEC_SIZE, NULL); \ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); \ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); \ OCL_SET_ARG(2, local_size*copiesPerWorkItem*sizeof(TYPE)*VEC_SIZE, NULL); \ OCL_SET_ARG(3, sizeof(int), &copiesPerWorkItem); \ \ OCL_MAP_BUFFER(1); \ for (uint32_t i = 0; i < n * copiesPerWorkItem * VEC_SIZE; ++i) \ ((TYPE*)buf_data[1])[i] = rand(); \ OCL_UNMAP_BUFFER(1); \ \ /* Run the kernel */\ globals[0] = n; \ locals[0] = local_size; \ OCL_NDRANGE(1); \ OCL_MAP_BUFFER(0); \ OCL_MAP_BUFFER(1); \ \ /* Check results */\ TYPE *dst = (TYPE*)buf_data[0]; \ TYPE *src = (TYPE*)buf_data[1]; \ for (uint32_t i = 0; i < n * copiesPerWorkItem * VEC_SIZE; i++) \ OCL_ASSERT(dst[i] == src[i]); \ OCL_UNMAP_BUFFER(0); \ OCL_UNMAP_BUFFER(1); \ } \ \ MAKE_UTEST_FROM_FUNCTION(compiler_async_copy_##KER_TYPE##VEC_SIZE); DEF(char, char, 2); DEF(uchar, uchar, 2); DEF(short, short, 2); DEF(ushort, ushort, 2); DEF(int, int, 2); DEF(uint, uint, 2); DEF(int64_t, long, 2); DEF(uint64_t, ulong, 2); DEF(float, float, 2); DEF(double, double, 2); Release_v0.3/utests/compiler_async_copy_and_prefetch.cpp000066400000000000000000000003051223142177000237770ustar00rootroot00000000000000#include "utest_helper.hpp" void compiler_async_copy_and_prefetch(void) { OCL_CREATE_KERNEL("compiler_async_copy_and_prefetch"); } MAKE_UTEST_FROM_FUNCTION(compiler_async_copy_and_prefetch); Release_v0.3/utests/compiler_async_stride_copy.cpp000066400000000000000000000026301223142177000226520ustar00rootroot00000000000000#include "utest_helper.hpp" static void compiler_async_stride_copy(void) { const size_t n = 1024; const size_t local_size = 128; const int copiesPerWorkItem = 5; const int stride =3; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_async_stride_copy"); OCL_CREATE_BUFFER(buf[0], 0, n * copiesPerWorkItem * sizeof(char) * 4 * stride, NULL); OCL_CREATE_BUFFER(buf[1], 0, n * copiesPerWorkItem * sizeof(char) * 4 * stride, NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, local_size*copiesPerWorkItem*sizeof(char)*4, NULL); OCL_SET_ARG(3, sizeof(int), &copiesPerWorkItem); OCL_SET_ARG(4, sizeof(int), &stride); OCL_MAP_BUFFER(1); for (uint32_t i = 0; i < n * copiesPerWorkItem * 4 * stride; ++i) ((char*)buf_data[1])[i] = rand() && 0xff; OCL_UNMAP_BUFFER(1); // Run the kernel globals[0] = n; locals[0] = local_size; OCL_NDRANGE(1); OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); // Check results char *dst = (char*)buf_data[0]; char *src = (char*)buf_data[1]; for (uint32_t i = 0; i < n * copiesPerWorkItem; i += stride * 4) { OCL_ASSERT(dst[i + 0] == src[i + 0] + 3); OCL_ASSERT(dst[i + 1] == src[i + 1] + 3); OCL_ASSERT(dst[i + 2] == src[i + 2] + 3); OCL_ASSERT(dst[i + 3] == src[i + 3] + 3); } OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); } MAKE_UTEST_FROM_FUNCTION(compiler_async_stride_copy); Release_v0.3/utests/compiler_atomic_functions.cpp000066400000000000000000000057071223142177000225050ustar00rootroot00000000000000#include "utest_helper.hpp" #include #include #include #define GROUP_NUM 16 #define LOCAL_SIZE 256 static void cpu_compiler_atomic(int *dst, int *src) { dst[4] = 0xffffffff; int tmp[16] = { 0 }; tmp[4] = -1; for(int j=0; j>4)); break; case 5: tmp[i] |= src[j]<<(j>>4); break; case 6: tmp[i] ^= src[j]; break; case 7: tmp[i] = tmp[i] < -src[j] ? tmp[i] : -src[j]; break; case 8: tmp[i] = tmp[i] > src[j] ? tmp[i] : src[j]; break; case 9: tmp[i] = (unsigned int)tmp[i] < (unsigned int)(-src[j]) ? tmp[i] : -src[j]; break; case 10: tmp[i] = (unsigned int)tmp[i] > (unsigned int)(src[j]) ? tmp[i] : src[j]; break; case 11: tmp[i] = src[10]; break; default: break; } } for(int k=0; k>4)); break; case 5: dst[i] |= src[j]<<(j>>4); break; case 6: dst[i] ^= src[j]; break; case 7: dst[i] = dst[i] < -src[j] ? dst[i] : -src[j]; break; case 8: dst[i] = dst[i] > src[j] ? dst[i] : src[j]; break; case 9: dst[i] = (unsigned int)dst[i] < (unsigned int)(-src[j]) ? dst[i] : -src[j]; break; case 10: dst[i] = (unsigned int)dst[i] > (unsigned int)(src[j]) ? dst[i] : src[j]; break; case 11: dst[i] = src[10]; break; default: break; } } } for(int i=0; i<12; i++) dst[i+12] = tmp[i]; } static void compiler_atomic_functions(void) { const size_t n = GROUP_NUM * LOCAL_SIZE; int cpu_dst[24] = {0}, cpu_src[256]; globals[0] = n; locals[0] = LOCAL_SIZE; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_atomic_functions"); OCL_CREATE_BUFFER(buf[0], 0, 24 * sizeof(int), NULL); OCL_CREATE_BUFFER(buf[1], 0, locals[0] * sizeof(int), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, 16 * sizeof(int), NULL); OCL_SET_ARG(2, sizeof(cl_mem), &buf[1]); OCL_MAP_BUFFER(0); memset(buf_data[0], 0, 24 * sizeof(int)); ((int *)buf_data[0])[4] = -1; OCL_UNMAP_BUFFER(0); OCL_MAP_BUFFER(1); for (uint32_t i = 0; i < locals[0]; ++i) cpu_src[i] = ((int*)buf_data[1])[i] = rand() & 0xff; cpu_compiler_atomic(cpu_dst, cpu_src); OCL_UNMAP_BUFFER(1); OCL_NDRANGE(1); OCL_MAP_BUFFER(0); // Check results for(int i=0; i<24; i++) { //printf("The dst(%d) gpu(0x%x) cpu(0x%x)\n", i, ((uint32_t *)buf_data[0])[i], cpu_dst[i]); OCL_ASSERT(((int *)buf_data[0])[i] == cpu_dst[i]); } OCL_UNMAP_BUFFER(0); } MAKE_UTEST_FROM_FUNCTION(compiler_atomic_functions) Release_v0.3/utests/compiler_basic_arithmetic.cpp000066400000000000000000000066201223142177000224260ustar00rootroot00000000000000#include "utest_helper.hpp" enum eTestOP { TEST_OP_ADD =0, TEST_OP_SUB, TEST_OP_MUL, TEST_OP_DIV, TEST_OP_REM }; template static void test_exec(const char* kernel_name) { const size_t n = 160; // Setup kernel and buffers OCL_CREATE_KERNEL_FROM_FILE("compiler_basic_arithmetic", kernel_name); std::cout <<"kernel name: " << kernel_name << std::endl; buf_data[0] = (T*) malloc(sizeof(T) * n); buf_data[1] = (T*) malloc(sizeof(T) * n); for (uint32_t i = 0; i < n; ++i) ((T*)buf_data[0])[i] = (T) rand(); for (uint32_t i = 0; i < n; ++i) ((T*)buf_data[1])[i] = (T) rand(); if(op == TEST_OP_DIV || op == TEST_OP_REM) { for (uint32_t i = 0; i < n; ++i) { if(((T*)buf_data[1])[i] == 0) ((T*)buf_data[1])[i] = (T) 1; } } OCL_CREATE_BUFFER(buf[0], CL_MEM_COPY_HOST_PTR, n * sizeof(T), buf_data[0]); OCL_CREATE_BUFFER(buf[1], CL_MEM_COPY_HOST_PTR, n * sizeof(T), buf_data[1]); OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(T), NULL); // Run the kernel OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); globals[0] = n; locals[0] = 16; OCL_NDRANGE(1); // Check result OCL_MAP_BUFFER(2); if(op == TEST_OP_SUB) { for (uint32_t i = 0; i < n; ++i) OCL_ASSERT(((T*)buf_data[2])[i] == (T)(((T*)buf_data[0])[i] - ((T*)buf_data[1])[i])); } else if(op == TEST_OP_ADD) { for (uint32_t i = 0; i < n; ++i) OCL_ASSERT(((T*)buf_data[2])[i] == (T)(((T*)buf_data[0])[i] + ((T*)buf_data[1])[i])); } else if(op == TEST_OP_MUL) { for (uint32_t i = 0; i < n; ++i) OCL_ASSERT(((T*)buf_data[2])[i] == (T)(((T*)buf_data[0])[i] * ((T*)buf_data[1])[i])); } else if(op == TEST_OP_DIV) { for (uint32_t i = 0; i < n; ++i) OCL_ASSERT(((T*)buf_data[2])[i] == (T)(((T*)buf_data[0])[i] / ((T*)buf_data[1])[i])); } else { for (uint32_t i = 0; i < n; ++i) OCL_ASSERT(((T*)buf_data[2])[i] == (T)(((T*)buf_data[0])[i] % ((T*)buf_data[1])[i])); } free(buf_data[0]); free(buf_data[1]); buf_data[0] = buf_data[1] = NULL; } #define DECL_TEST_SUB(type, alias) \ static void compiler_sub_ ##alias(void)\ {\ test_exec("compiler_sub_" # alias);\ }\ MAKE_UTEST_FROM_FUNCTION(compiler_sub_ ## alias) #define DECL_TEST_ADD(type, alias) \ static void compiler_add_ ##alias(void)\ {\ test_exec("compiler_add_" # alias);\ }\ MAKE_UTEST_FROM_FUNCTION(compiler_add_ ## alias) #define DECL_TEST_MUL(type, alias) \ static void compiler_mul_ ##alias(void)\ {\ test_exec("compiler_mul_" # alias);\ }\ MAKE_UTEST_FROM_FUNCTION(compiler_mul_ ## alias) #define DECL_TEST_DIV(type, alias) \ static void compiler_div_ ##alias(void)\ {\ test_exec("compiler_div_" # alias);\ }\ MAKE_UTEST_FROM_FUNCTION(compiler_div_ ## alias) #define DECL_TEST_REM(type, alias) \ static void compiler_rem_ ##alias(void)\ {\ test_exec("compiler_rem_" # alias);\ }\ MAKE_UTEST_FROM_FUNCTION(compiler_rem_ ## alias) #define DECL_TEST_FOR_ALL_TYPE(op)\ DECL_TEST_##op(int8_t, char) \ DECL_TEST_##op(uint8_t, uchar) \ DECL_TEST_##op(int16_t, short) \ DECL_TEST_##op(uint16_t, ushort) \ DECL_TEST_##op(int32_t, int) \ DECL_TEST_##op(uint32_t, uint) DECL_TEST_FOR_ALL_TYPE(SUB) DECL_TEST_FOR_ALL_TYPE(ADD) DECL_TEST_FOR_ALL_TYPE(MUL) DECL_TEST_FOR_ALL_TYPE(DIV) DECL_TEST_FOR_ALL_TYPE(REM) #undef DECL_TEST_FOR_ALL_TYPE Release_v0.3/utests/compiler_bool_cross_basic_block.cpp000066400000000000000000000025401223142177000236100ustar00rootroot00000000000000#include "utest_helper.hpp" static void cpu(int global_id, int *src, int *dst, int scale) { bool isRedRow = false; bool isRed; int val = src[global_id]; for (int i=0; i static int w = 0; static int h = 0; static int sz = 0; static const size_t chunk = 64; static int *src = NULL, *dst = NULL; static void compiler_box_blur() { OCL_CREATE_KERNEL("compiler_box_blur"); /* Load the picture */ src = cl_read_bmp("lenna128x128.bmp", &w, &h); sz = w * h * sizeof(int); /* Run the kernel */ OCL_CREATE_BUFFER(buf[0], CL_MEM_COPY_HOST_PTR, sz, src); OCL_CREATE_BUFFER(buf[1], 0, sz, NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(int), &w); OCL_SET_ARG(3, sizeof(int), &h); OCL_SET_ARG(4, sizeof(int), &chunk); globals[0] = size_t(w/4); globals[1] = h/chunk + ((h%chunk)?1:0); locals[0] = 16; locals[1] = 1; free(src); OCL_NDRANGE(2); OCL_MAP_BUFFER(1); dst = (int*) buf_data[1]; /* Save the image (for debug purpose) */ cl_write_bmp(dst, w, h, "compiler_box_blur.bmp"); /* Compare with the golden image */ OCL_CHECK_IMAGE(dst, w, h, "compiler_box_blur_ref.bmp"); } MAKE_UTEST_FROM_FUNCTION(compiler_box_blur); Release_v0.3/utests/compiler_box_blur_float.cpp000066400000000000000000000033431223142177000221340ustar00rootroot00000000000000#include "utest_helper.hpp" #include static int *tmp = NULL; static struct float4 {float x,y,z,w;} *src = NULL, *dst = NULL; static int w = 0; static int h = 0; static int sz = 0; static const size_t chunk = 64; static void compiler_box_blur_float() { OCL_CREATE_KERNEL("compiler_box_blur_float"); /* Load the picture */ tmp = cl_read_bmp("lenna128x128.bmp", &w, &h); sz = w * h * sizeof(float[4]); src = (float4*)malloc(sz); /* RGBA -> float4 conversion */ const int n = w*h; for (int i = 0; i < n; ++i) { src[i].x = (float) (tmp[i] & 0xff); src[i].y = (float) ((tmp[i] >> 8) & 0xff); src[i].z = (float) ((tmp[i] >> 16) & 0xff); src[i].w = 0.f; } free(tmp); /* Run the kernel */ OCL_CREATE_BUFFER(buf[0], CL_MEM_COPY_HOST_PTR, sz, src); OCL_CREATE_BUFFER(buf[1], 0, sz, NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(int), &w); OCL_SET_ARG(3, sizeof(int), &h); OCL_SET_ARG(4, sizeof(int), &chunk); globals[0] = size_t(w); globals[1] = h/chunk + ((h%chunk)?1:0); locals[0] = 16; locals[1] = 1; free(src); OCL_NDRANGE(2); OCL_MAP_BUFFER(1); dst = (float4*) buf_data[1]; /* Convert back to RGBA and save */ int *tmp = (int*) malloc(n*sizeof(int)); for (int i = 0; i < n; ++i) { int to = int(std::min(dst[i].x, 255.f)); to |= int(std::min(dst[i].y, 255.f)) << 8; to |= int(std::min(dst[i].z, 255.f)) << 16; tmp[i] = to; } /* Save the image (for debug purpose) */ cl_write_bmp(tmp, w, h, "compiler_box_blur_float.bmp"); /* Compare with the golden image */ OCL_CHECK_IMAGE(tmp, w, h, "compiler_box_blur_float_ref.bmp"); free(tmp); } MAKE_UTEST_FROM_FUNCTION(compiler_box_blur_float); Release_v0.3/utests/compiler_box_blur_image.cpp000066400000000000000000000022141223142177000221050ustar00rootroot00000000000000#include "utest_helper.hpp" static void compiler_box_blur_image() { int w, h; cl_image_format format = { }; size_t origin[3] = { }; size_t region[3]; int *src, *dst; OCL_CREATE_KERNEL("compiler_box_blur_image"); /* Load the picture */ src = cl_read_bmp("lenna128x128.bmp", &w, &h); format.image_channel_order = CL_RGBA; format.image_channel_data_type = CL_UNORM_INT8; /* Run the kernel */ OCL_CREATE_IMAGE2D(buf[0], CL_MEM_COPY_HOST_PTR, &format, w, h, w*sizeof(uint32_t), src); free(src); OCL_CREATE_IMAGE2D(buf[1], 0, &format, w, h, 0, NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = w; globals[1] = h; locals[0] = 16; locals[1] = 16; OCL_NDRANGE(2); dst = (int*)malloc(w*h*sizeof(uint32_t)); region[0] = w; region[1] = h; region[2] = 1; OCL_READ_IMAGE(buf[1], origin, region, dst); /* Save the image (for debug purpose) */ cl_write_bmp(dst, w, h, "compiler_box_blur_image.bmp"); /* Compare with the golden image */ OCL_CHECK_IMAGE(dst, w, h, "compiler_box_blur_ref.bmp"); free(dst); } MAKE_UTEST_FROM_FUNCTION(compiler_box_blur_image); Release_v0.3/utests/compiler_byte_scatter.cpp000066400000000000000000000010361223142177000216200ustar00rootroot00000000000000#include "utest_helper.hpp" static void compiler_byte_scatter(void) { const size_t n = 128; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_byte_scatter"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int8_t), NULL); // Run the kernel OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); globals[0] = n; locals[0] = 16; OCL_NDRANGE(1); // Check result OCL_MAP_BUFFER(0); for (int32_t i = 0; i < (int32_t) n; ++i) OCL_ASSERT(((int8_t*)buf_data[0])[i] == (int8_t) i); } MAKE_UTEST_FROM_FUNCTION(compiler_byte_scatter); Release_v0.3/utests/compiler_ceil.cpp000066400000000000000000000021551223142177000200470ustar00rootroot00000000000000#include #include "utest_helper.hpp" static void cpu(int global_id, float *src, float *dst) { dst[global_id] = ceilf(src[global_id]); } void compiler_ceil(void) { const size_t n = 16; float cpu_dst[16], cpu_src[16]; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_ceil"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(float), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = 16; locals[0] = 16; // Run random tests for (uint32_t pass = 0; pass < 8; ++pass) { OCL_MAP_BUFFER(0); for (int32_t i = 0; i < (int32_t) n; ++i) cpu_src[i] = ((float*)buf_data[0])[i] = .1f * (rand() & 15) - .75f; OCL_UNMAP_BUFFER(0); // Run the kernel on GPU OCL_NDRANGE(1); // Run on CPU for (int32_t i = 0; i < (int32_t) n; ++i) cpu(i, cpu_src, cpu_dst); // Compare OCL_MAP_BUFFER(1); for (int32_t i = 0; i < (int32_t) n; ++i) OCL_ASSERT(((float *)buf_data[1])[i] == cpu_dst[i]); OCL_UNMAP_BUFFER(1); } } MAKE_UTEST_FROM_FUNCTION(compiler_ceil); Release_v0.3/utests/compiler_cl_finish.cpp000066400000000000000000000023111223142177000210630ustar00rootroot00000000000000#include "utest_helper.hpp" #include #define T_GET(t) gettimeofday(&t, NULL); #define T_LAPSE(t1, t2) \ ((t2.tv_sec+t2.tv_usec*0.000001) - (t1.tv_sec+t1.tv_usec*0.000001)) static void compiler_cl_finish(void) { const size_t n = 16*1024*1024; struct timeval t1, t2; float t_fin, t_map_w_fin,t_map_wo_fin; // Setup kernel and buffers OCL_CREATE_KERNEL("test_cl_finish"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(int), NULL); // Run the kernel locals[0] = 64; globals[0] = 32 * locals[0]; OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(int), &n); OCL_SET_ARG(3, sizeof(int), &globals[0]); // 1st time map after clFinish OCL_NDRANGE(1); T_GET(t1); OCL_FINISH(); T_GET(t2); t_fin = T_LAPSE(t1, t2); T_GET(t1); OCL_MAP_BUFFER(0); T_GET(t2); t_map_w_fin = T_LAPSE(t1, t2); // 2nd time map without clFinish OCL_NDRANGE(1); T_GET(t1); OCL_MAP_BUFFER(0); T_GET(t2); t_map_wo_fin = T_LAPSE(t1, t2); OCL_ASSERT(t_fin > t_map_w_fin && t_map_wo_fin > t_map_w_fin); OCL_UNMAP_BUFFER(0); } MAKE_UTEST_FROM_FUNCTION(compiler_cl_finish); Release_v0.3/utests/compiler_clz_int.cpp000066400000000000000000000014131223142177000205710ustar00rootroot00000000000000#include "utest_helper.hpp" void compiler_clz_int(void) { const int n = 32; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_clz_int"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(int), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = n; locals[0] = 16; OCL_MAP_BUFFER(0); ((int*)buf_data[0])[0] = 0; for (int32_t i = 1; i < (int32_t) n; ++i) ((int*)buf_data[0])[i] = 0xffffffffu >> i; OCL_UNMAP_BUFFER(0); OCL_NDRANGE(1); OCL_MAP_BUFFER(1); OCL_ASSERT(((int*)buf_data[1])[0] == 32); for (int i = 1; i < n; ++i) OCL_ASSERT(((int*)buf_data[1])[i] == i); OCL_UNMAP_BUFFER(1); } MAKE_UTEST_FROM_FUNCTION(compiler_clz_int); Release_v0.3/utests/compiler_clz_short.cpp000066400000000000000000000014631223142177000211430ustar00rootroot00000000000000#include "utest_helper.hpp" void compiler_clz_short(void) { const size_t n = 16; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_clz_short"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(short), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(short), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = n; locals[0] = 16; OCL_MAP_BUFFER(0); ((short*)buf_data[0])[0] = 0; for (int32_t i = 1; i < (int32_t) n; ++i) ((short*)buf_data[0])[i] = 0xffffu >> i; OCL_UNMAP_BUFFER(0); OCL_NDRANGE(1); OCL_MAP_BUFFER(1); OCL_ASSERT(((short*)buf_data[1])[0] == 16); for (unsigned i = 1; i < (unsigned) n; ++i) OCL_ASSERT(((short*)buf_data[1])[i] == (short)i); OCL_UNMAP_BUFFER(1); } MAKE_UTEST_FROM_FUNCTION(compiler_clz_short); Release_v0.3/utests/compiler_convert_uchar_sat.cpp000066400000000000000000000022401223142177000226370ustar00rootroot00000000000000#include "utest_helper.hpp" static void cpu(int global_id, float *src, int *dst) { float f = src[global_id]; dst[global_id] = f > 255 ? 255 : f < 0 ? 0 : f; } void compiler_convert_uchar_sat(void) { const size_t n = 16; float cpu_src[16]; int cpu_dst[16]; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_convert_uchar_sat"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(int), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = 16; locals[0] = 16; // Run random tests for (uint32_t pass = 0; pass < 8; ++pass) { OCL_MAP_BUFFER(0); for (int32_t i = 0; i < (int32_t) n; ++i) cpu_src[i] = ((float*)buf_data[0])[i] = (rand() & 1023) / 2; OCL_UNMAP_BUFFER(0); // Run the kernel on GPU OCL_NDRANGE(1); // Run on CPU for (int32_t i = 0; i < (int32_t) n; ++i) cpu(i, cpu_src, cpu_dst); // Compare OCL_MAP_BUFFER(1); for (int32_t i = 0; i < (int32_t) n; ++i) OCL_ASSERT(((int *)buf_data[1])[i] == cpu_dst[i]); OCL_UNMAP_BUFFER(1); } } MAKE_UTEST_FROM_FUNCTION(compiler_convert_uchar_sat); Release_v0.3/utests/compiler_copy_buffer.cpp000066400000000000000000000016201223142177000214320ustar00rootroot00000000000000#include "utest_helper.hpp" static void compiler_copy_buffer(void) { const size_t n = 8192 * 4; // Setup kernel and buffers OCL_CREATE_KERNEL("test_copy_buffer"); //OCL_CREATE_KERNEL("compiler_array"); buf_data[0] = (uint32_t*) malloc(sizeof(uint32_t) * n); for (uint32_t i = 0; i < n; ++i) ((uint32_t*)buf_data[0])[i] = i; OCL_CREATE_BUFFER(buf[0], CL_MEM_COPY_HOST_PTR, n * sizeof(uint32_t), buf_data[0]); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL); free(buf_data[0]); buf_data[0] = NULL; // Run the kernel OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = n; locals[0] = 16; OCL_NDRANGE(1); // Check result OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); for (uint32_t i = 0; i < n; ++i) OCL_ASSERT(((uint32_t*)buf_data[0])[i] == ((uint32_t*)buf_data[1])[i]); } MAKE_UTEST_FROM_FUNCTION(compiler_copy_buffer); Release_v0.3/utests/compiler_copy_buffer_row.cpp000066400000000000000000000022531223142177000223240ustar00rootroot00000000000000#include "utest_helper.hpp" static void compiler_copy_buffer_row(void) { uint32_t *src_buffer = NULL; int *data_buffer = NULL; const int row = 8192; const int row_n = 2; const int n = row * row_n; // Setup kernel and buffers OCL_CREATE_KERNEL("test_copy_buffer_row"); src_buffer = (uint32_t *) malloc(sizeof(uint32_t) * n); for (int32_t i = 0; i < n; ++i) src_buffer[i] = i; data_buffer = (int *) malloc(sizeof(int) * 2); data_buffer[0] = row; data_buffer[1] = n; OCL_CREATE_BUFFER(buf[0], CL_MEM_COPY_HOST_PTR, n * sizeof(uint32_t), src_buffer); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL); OCL_CREATE_BUFFER(buf[2], CL_MEM_COPY_HOST_PTR, 2 * sizeof(uint32_t), data_buffer); free(src_buffer); free(data_buffer); // Run the kernel OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); globals[0] = n; locals[0] = 16; OCL_NDRANGE(1); // Check results OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); for (int32_t i = 0; i < n; ++i) OCL_ASSERT(((uint32_t*)buf_data[0])[i] == ((uint32_t*)buf_data[1])[i]); } MAKE_UTEST_FROM_FUNCTION(compiler_copy_buffer_row); Release_v0.3/utests/compiler_copy_image.cpp000066400000000000000000000025131223142177000212450ustar00rootroot00000000000000#include "utest_helper.hpp" static void compiler_copy_image(void) { const size_t w = 512; const size_t h = 512; cl_image_format format; cl_sampler sampler; // Setup kernel and images OCL_CREATE_KERNEL("test_copy_image"); buf_data[0] = (uint32_t*) malloc(sizeof(uint32_t) * w * h); for (uint32_t j = 0; j < h; ++j) for (uint32_t i = 0; i < w; i++) ((uint32_t*)buf_data[0])[j * w + i] = j * w + i; format.image_channel_order = CL_RGBA; format.image_channel_data_type = CL_UNSIGNED_INT8; OCL_CREATE_IMAGE2D(buf[0], CL_MEM_COPY_HOST_PTR, &format, w, h, w * sizeof(uint32_t), buf_data[0]); OCL_CREATE_IMAGE2D(buf[1], 0, &format, w, h, 0, NULL); OCL_CREATE_SAMPLER(sampler, CL_ADDRESS_REPEAT, CL_FILTER_NEAREST); free(buf_data[0]); buf_data[0] = NULL; // Run the kernel OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(sampler), &sampler); globals[0] = w; globals[1] = h; locals[0] = 16; locals[1] = 16; OCL_NDRANGE(2); // Check result OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); for (uint32_t j = 0; j < h; ++j) for (uint32_t i = 0; i < w; i++) OCL_ASSERT(((uint32_t*)buf_data[0])[j * w + i] == ((uint32_t*)buf_data[1])[j * w + i]); OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); } MAKE_UTEST_FROM_FUNCTION(compiler_copy_image); Release_v0.3/utests/compiler_copy_image1.cpp000066400000000000000000000041041223142177000213240ustar00rootroot00000000000000#include "utest_helper.hpp" static void compiler_copy_image1(void) { const size_t w = 512; const size_t h = 512; cl_image_format format; cl_sampler sampler; // Setup kernel and images OCL_CREATE_KERNEL("test_copy_image1"); buf_data[0] = (uint32_t*) malloc(sizeof(uint32_t) * w * h); for (uint32_t j = 0; j < h; ++j) for (uint32_t i = 0; i < w; i++) ((uint32_t*)buf_data[0])[j * w + i] = j * w + i; format.image_channel_order = CL_RGBA; format.image_channel_data_type = CL_UNSIGNED_INT8; OCL_CREATE_IMAGE2D(buf[0], CL_MEM_COPY_HOST_PTR, &format, w, h, w * sizeof(uint32_t), buf_data[0]); OCL_CREATE_SAMPLER(sampler, CL_ADDRESS_REPEAT, CL_FILTER_NEAREST); OCL_CREATE_IMAGE2D(buf[1], 0, &format, w, h, 0, NULL); OCL_CREATE_IMAGE2D(buf[2], 0, &format, w, h, 0, NULL); OCL_CREATE_IMAGE2D(buf[3], 0, &format, w, h, 0, NULL); OCL_CREATE_IMAGE2D(buf[4], 0, &format, w, h, 0, NULL); OCL_CREATE_IMAGE2D(buf[5], 0, &format, w, h, 0, NULL); free(buf_data[0]); buf_data[0] = NULL; // Run the kernel OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(sampler), &sampler); OCL_SET_ARG(3, sizeof(cl_mem), &buf[2]); OCL_SET_ARG(4, sizeof(cl_mem), &buf[3]); OCL_SET_ARG(5, sizeof(cl_mem), &buf[4]); OCL_SET_ARG(6, sizeof(cl_mem), &buf[5]); float w_inv = 1.0/w; float h_inv = 1.0/h; OCL_SET_ARG(7, sizeof(float), &w_inv); OCL_SET_ARG(8, sizeof(float), &h_inv); globals[0] = w; globals[1] = h; locals[0] = 16; locals[1] = 16; OCL_NDRANGE(2); // Check result OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); OCL_MAP_BUFFER(2); OCL_MAP_BUFFER(3); OCL_MAP_BUFFER(4); OCL_MAP_BUFFER(5); for(uint32_t k = 0; k < 5; k++) { for (uint32_t j = 0; j < h; ++j) for (uint32_t i = 0; i < w; i++) OCL_ASSERT(((uint32_t*)buf_data[0])[j * w + i] == ((uint32_t*)buf_data[1 + k])[j * w + i]); } OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); OCL_UNMAP_BUFFER(2); OCL_UNMAP_BUFFER(3); OCL_UNMAP_BUFFER(4); OCL_UNMAP_BUFFER(5); } MAKE_UTEST_FROM_FUNCTION(compiler_copy_image1); Release_v0.3/utests/compiler_copy_image_3d.cpp000066400000000000000000000036511223142177000216370ustar00rootroot00000000000000#include "utest_helper.hpp" #include "string.h" static void compiler_copy_image_3d(void) { const size_t w = 512; const size_t h = 512; const size_t depth = 4; cl_image_format format; cl_sampler sampler; // Setup kernel and images OCL_CREATE_KERNEL("test_copy_image_3d"); buf_data[0] = (uint32_t*) malloc(sizeof(uint32_t) * w * h * depth); for (uint32_t k = 0; k < depth; k++) for (uint32_t j = 0; j < h; j++) for (uint32_t i = 0; i < w; i++) ((float*)buf_data[0])[k*w*h + j*w + i] = (k << 10) + (j << 10) + i; format.image_channel_order = CL_RGBA; format.image_channel_data_type = CL_UNORM_INT8; OCL_CREATE_IMAGE3D(buf[0], CL_MEM_COPY_HOST_PTR, &format, w, h, depth, w*4, w*h*4, buf_data[0]); OCL_CREATE_IMAGE3D(buf[1], 0, &format, w, h, depth, 0, 0, NULL); for(uint32_t i = 0; i < depth; i++) OCL_CREATE_IMAGE2D(buf[2 + i], 0, &format, w, h, 0, NULL); OCL_CREATE_SAMPLER(sampler, CL_ADDRESS_REPEAT, CL_FILTER_NEAREST); free(buf_data[0]); buf_data[0] = NULL; // Run the kernel OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(sampler), &sampler); for(uint32_t i = 0; i < depth; i++) OCL_SET_ARG(3 + i, sizeof(cl_mem), &buf[2 + i]); globals[0] = w; globals[1] = h; globals[2] = depth; locals[0] = 64; locals[1] = 1; locals[2] = 1; OCL_NDRANGE(3); // Check result for(uint32_t i = 0; i < depth + 2; i++) OCL_MAP_BUFFER_GTT(i); for (uint32_t k = 0; k < depth; k++) for (uint32_t j = 0; j < h; ++j) for (uint32_t i = 0; i < w; i++) { OCL_ASSERT(((float*)buf_data[0])[k*w*((h+1)&-2LL) + j*w + i] == ((float*)buf_data[1])[k*w*((h+1)&-2LL) + j*w + i]); OCL_ASSERT(((float*)buf_data[0])[k*w*((h+1)&-2LL) + j*w + i] == ((float*)buf_data[k + 2])[j * w + i]); } for(uint32_t i = 0; i < depth + 2; i++) OCL_UNMAP_BUFFER_GTT(i); } MAKE_UTEST_FROM_FUNCTION(compiler_copy_image_3d); Release_v0.3/utests/compiler_data_types.cpp000066400000000000000000000002351223142177000212650ustar00rootroot00000000000000#include "utest_helper.hpp" void compiler_data_types(void) { OCL_CREATE_KERNEL("compiler_data_types"); } MAKE_UTEST_FROM_FUNCTION(compiler_data_types); Release_v0.3/utests/compiler_degrees.cpp000066400000000000000000000014021223142177000205430ustar00rootroot00000000000000#include "utest_helper.hpp" void compiler_degrees(void) { const int n = 32; float src[n]; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_degrees"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(float), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = n; locals[0] = 16; OCL_MAP_BUFFER(0); for (int i = 0; i < n; ++i) { src[i] = ((float *)buf_data[0])[i] = rand() * 0.01f; } OCL_UNMAP_BUFFER(0); OCL_NDRANGE(1); OCL_MAP_BUFFER(1); for (int i = 0; i < n; ++i) { OCL_ASSERT(((float *)buf_data[1])[i] == src[i] * (180 / 3.141592653589793F)); } OCL_UNMAP_BUFFER(1); } MAKE_UTEST_FROM_FUNCTION(compiler_degrees); Release_v0.3/utests/compiler_displacement_map_element.cpp000066400000000000000000000033321223142177000241470ustar00rootroot00000000000000#include "utest_helper.hpp" typedef unsigned int uint; constexpr int W = 16, H = 16; constexpr int SIZE = W * H; uint in_1[SIZE]; uint disp_map[SIZE]; uint out_1[SIZE]; uint cpu(const int cx, const int cy, const uint *in, const uint *disp_map, int w, int h) { uint c = disp_map[cy * w + cx]; int x_pos = cx + c; int y_pos = cy + c; if(0 <= x_pos && x_pos < w && 0 <= y_pos && y_pos < h) return in[y_pos * w + x_pos]; else return 0; } void test() { OCL_MAP_BUFFER(2); for(int y=0; y #include "utest_helper.hpp" static void cpu(int global_id, double *src, double *dst) { double f = src[global_id]; double d = 1.234567890123456789; dst[global_id] = global_id < 14 ? (d * (f + d)) : 14; } void compiler_double(void) { const size_t n = 16; double cpu_dst[n], cpu_src[n]; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_double"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(double), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(double), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = n; locals[0] = 16; // Run random tests for (uint32_t pass = 0; pass < 1; ++pass) { OCL_MAP_BUFFER(0); for (int32_t i = 0; i < (int32_t) n; ++i) cpu_src[i] = ((double*)buf_data[0])[i] = .1f * (rand() & 15) - .75f; OCL_UNMAP_BUFFER(0); // Run the kernel on GPU OCL_NDRANGE(1); // Run on CPU for (int32_t i = 0; i < (int32_t) n; ++i) cpu(i, cpu_src, cpu_dst); // Compare OCL_MAP_BUFFER(1); for (int32_t i = 0; i < (int32_t) n; ++i) OCL_ASSERT(fabs(((double*)buf_data[1])[i] - cpu_dst[i]) < 1e-4); OCL_UNMAP_BUFFER(1); } } MAKE_UTEST_FROM_FUNCTION(compiler_double); Release_v0.3/utests/compiler_double_2.cpp000066400000000000000000000023351223142177000206260ustar00rootroot00000000000000#include #include "utest_helper.hpp" static void cpu(int global_id, float *src, double *dst) { float f = src[global_id]; float d = 1.234567890123456789; dst[global_id] = global_id < 14 ? d * (d + f) : 14; } void compiler_double_2(void) { const size_t n = 16; float cpu_src[n]; double cpu_dst[n]; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_double_2"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(double), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = n; locals[0] = 16; // Run random tests for (uint32_t pass = 0; pass < 1; ++pass) { OCL_MAP_BUFFER(0); for (int32_t i = 0; i < (int32_t) n; ++i) cpu_src[i] = ((float*)buf_data[0])[i] = .1f * (rand() & 15) - .75f; OCL_UNMAP_BUFFER(0); // Run the kernel on GPU OCL_NDRANGE(1); // Run on CPU for (int32_t i = 0; i < (int32_t) n; ++i) cpu(i, cpu_src, cpu_dst); // Compare OCL_MAP_BUFFER(1); for (int32_t i = 0; i < (int32_t) n; ++i) OCL_ASSERT(fabs(((double*)buf_data[1])[i] - cpu_dst[i]) < 1e-4); OCL_UNMAP_BUFFER(1); } } MAKE_UTEST_FROM_FUNCTION(compiler_double_2); Release_v0.3/utests/compiler_double_3.cpp000066400000000000000000000022671223142177000206330ustar00rootroot00000000000000#include #include "utest_helper.hpp" static void cpu(int global_id, float *src, double *dst) { float d = 1.234567890123456789; dst[global_id] = global_id < 14 ? d : 14; } void compiler_double_3(void) { const size_t n = 16; float cpu_src[n]; double cpu_dst[n]; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_double_3"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(double), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = n; locals[0] = 16; // Run random tests for (uint32_t pass = 0; pass < 1; ++pass) { OCL_MAP_BUFFER(0); for (int32_t i = 0; i < (int32_t) n; ++i) cpu_src[i] = ((float*)buf_data[0])[i] = .1f * (rand() & 15) - .75f; OCL_UNMAP_BUFFER(0); // Run the kernel on GPU OCL_NDRANGE(1); // Run on CPU for (int32_t i = 0; i < (int32_t) n; ++i) cpu(i, cpu_src, cpu_dst); // Compare OCL_MAP_BUFFER(1); for (int32_t i = 0; i < (int32_t) n; ++i) OCL_ASSERT(fabs(((double*)buf_data[1])[i] - cpu_dst[i]) < 1e-4); OCL_UNMAP_BUFFER(1); } } MAKE_UTEST_FROM_FUNCTION(compiler_double_3); Release_v0.3/utests/compiler_double_4.cpp000066400000000000000000000021261223142177000206260ustar00rootroot00000000000000#include #include "utest_helper.hpp" void compiler_double_4(void) { const size_t n = 16; double cpu_src1[n], cpu_src2[n]; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_double_4"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(double), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(double), NULL); OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(double), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); globals[0] = n; locals[0] = 16; // Run random tests OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); for (int32_t i = 0; i < (int32_t) n; ++i) { cpu_src1[i] = ((double*)buf_data[0])[i] = rand() * 1e-2; cpu_src2[i] = ((double*)buf_data[1])[i] = rand() * 1e-2; } OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); // Run the kernel on GPU OCL_NDRANGE(1); // Compare OCL_MAP_BUFFER(2); for (int32_t i = 0; i < (int32_t) n; ++i) OCL_ASSERT(fabs(((double*)buf_data[2])[i] - cpu_src1[i] - cpu_src2[i]) < 1e-4); OCL_UNMAP_BUFFER(2); } MAKE_UTEST_FROM_FUNCTION(compiler_double_4); Release_v0.3/utests/compiler_fabs.cpp000066400000000000000000000021721223142177000200450ustar00rootroot00000000000000#include "utest_helper.hpp" static void cpu(int global_id, float *src, float *dst) { float f = src[global_id]; f = f < 0 ? -f : f; dst[global_id] = f; } void compiler_fabs(void) { const size_t n = 16; float cpu_dst[16], cpu_src[16]; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_fabs"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(float), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = 16; locals[0] = 16; // Run random tests for (uint32_t pass = 0; pass < 8; ++pass) { OCL_MAP_BUFFER(0); for (int32_t i = 0; i < (int32_t) n; ++i) cpu_src[i] = ((float*)buf_data[0])[i] = .1f * (rand() & 15) - .75f; OCL_UNMAP_BUFFER(0); // Run the kernel on GPU OCL_NDRANGE(1); // Run on CPU for (int32_t i = 0; i < (int32_t) n; ++i) cpu(i, cpu_src, cpu_dst); // Compare OCL_MAP_BUFFER(1); for (int32_t i = 0; i < (int32_t) n; ++i) OCL_ASSERT(((float *)buf_data[1])[i] == cpu_dst[i]); OCL_UNMAP_BUFFER(1); } } MAKE_UTEST_FROM_FUNCTION(compiler_fabs); Release_v0.3/utests/compiler_fill_gl_image.cpp000066400000000000000000000045311223142177000217050ustar00rootroot00000000000000#include "utest_helper.hpp" static void read_back(int tex, int width, int height, uint32_t * resultColor) { float vertices[8] = {-1, 1, 1, 1, 1, -1, -1, -1}; float tex_coords[8] = {0, 0, 1, 0, 1, 1, 0, 1}; glBindTexture(GL_TEXTURE_2D, tex); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glEnable(GL_TEXTURE_2D); glDisable(GL_BLEND); glVertexPointer(2, GL_FLOAT, sizeof(float) * 2, vertices); glEnableClientState(GL_VERTEX_ARRAY); glClientActiveTexture(GL_TEXTURE0); glTexCoordPointer(2, GL_FLOAT, sizeof(float) * 2, tex_coords); glEnableClientState(GL_TEXTURE_COORD_ARRAY); glDrawArrays(GL_TRIANGLE_FAN, 0, 4); glFlush(); OCL_SWAP_EGL_BUFFERS(); glReadPixels(0, 0, width, height, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, resultColor); } static void compiler_fill_gl_image(void) { const size_t w = EGL_WINDOW_WIDTH; const size_t h = EGL_WINDOW_HEIGHT; uint32_t color = 0x123456FF; uint32_t *resultColor; GLuint tex; if (eglContext == EGL_NO_CONTEXT) { fprintf(stderr, "There is no valid egl context. Ignore this case.\n"); return; } // Setup kernel and images glGenTextures(1, &tex); glBindTexture(GL_TEXTURE_2D, tex); // Must set the all filters to GL_NEAREST! glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, w, h, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, NULL); OCL_CREATE_KERNEL("test_fill_gl_image"); OCL_CREATE_GL_IMAGE2D(buf[0], 0, GL_TEXTURE_2D, 0, tex); // Run the kernel OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(color), &color); globals[0] = w; globals[1] = h; locals[0] = 16; locals[1] = 16; glFinish(); OCL_ENQUEUE_ACQUIRE_GL_OBJECTS(0); OCL_NDRANGE(2); OCL_FLUSH(); // Check result resultColor = new uint32_t[w * h * 4]; if (resultColor == NULL) assert(0); read_back(tex, w, h, resultColor); for (uint32_t j = 0; j < h; ++j) for (uint32_t i = 0; i < w; i++) OCL_ASSERT(resultColor[j * w + i] == color); OCL_UNMAP_BUFFER(0); delete resultColor; } MAKE_UTEST_FROM_FUNCTION(compiler_fill_gl_image); Release_v0.3/utests/compiler_fill_image.cpp000066400000000000000000000015151223142177000212220ustar00rootroot00000000000000#include "utest_helper.hpp" static void compiler_fill_image(void) { const size_t w = 512; const size_t h = 512; uint32_t color = 0x12345678; cl_image_format format; format.image_channel_order = CL_RGBA; format.image_channel_data_type = CL_UNSIGNED_INT8; // Setup kernel and images OCL_CREATE_KERNEL("test_fill_image"); OCL_CREATE_IMAGE2D(buf[0], 0, &format, w, h, 0, NULL); // Run the kernel OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(color), &color); globals[0] = w; globals[1] = h; locals[0] = 16; locals[1] = 16; OCL_NDRANGE(2); // Check result OCL_MAP_BUFFER(0); for (uint32_t j = 0; j < h; ++j) for (uint32_t i = 0; i < w; i++) OCL_ASSERT(((uint32_t*)buf_data[0])[j * w + i] == 0x78563412); OCL_UNMAP_BUFFER(0); } MAKE_UTEST_FROM_FUNCTION(compiler_fill_image); Release_v0.3/utests/compiler_fill_image0.cpp000066400000000000000000000014231223142177000213000ustar00rootroot00000000000000#include "utest_helper.hpp" static void compiler_fill_image0(void) { const size_t w = 512; const size_t h = 512; cl_image_format format; format.image_channel_order = CL_RGBA; format.image_channel_data_type = CL_UNSIGNED_INT8; // Setup kernel and images OCL_CREATE_KERNEL("test_fill_image0"); OCL_CREATE_IMAGE2D(buf[0], 0, &format, w, h, 0, NULL); // Run the kernel OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); globals[0] = w; globals[1] = h; locals[0] = 16; locals[1] = 16; OCL_NDRANGE(2); // Check result OCL_MAP_BUFFER_GTT(0); for (uint32_t j = 0; j < h; ++j) for (uint32_t i = 0; i < w; i++) OCL_ASSERT(((uint32_t*)buf_data[0])[j * w + i] == (i << 16 | j)); OCL_UNMAP_BUFFER_GTT(0); } MAKE_UTEST_FROM_FUNCTION(compiler_fill_image0); Release_v0.3/utests/compiler_fill_image_3d.cpp000066400000000000000000000017241223142177000216120ustar00rootroot00000000000000#include "utest_helper.hpp" static void compiler_fill_image_3d(void) { const size_t w = 512; const size_t h = 512; const size_t depth = 5; uint32_t color = 0x12345678; cl_image_format format; format.image_channel_order = CL_RGBA; format.image_channel_data_type = CL_UNSIGNED_INT8; // Setup kernel and images OCL_CREATE_KERNEL("test_fill_image_3d"); OCL_CREATE_IMAGE3D(buf[0], 0, &format, w, h, depth, 0, 0, NULL); // Run the kernel OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(color), &color); globals[0] = w; globals[1] = h; globals[2] = depth; locals[0] = 16; locals[1] = 16; locals[2] = 1; OCL_NDRANGE(3); // Check result OCL_MAP_BUFFER(0); for (uint32_t k = 0; k < depth; k++) for (uint32_t j = 0; j < h; ++j) for (uint32_t i = 0; i < w; i++) OCL_ASSERT(((uint32_t*)buf_data[0])[k*w*h + j*w + i] == 0x78563412); OCL_UNMAP_BUFFER(0); } MAKE_UTEST_FROM_FUNCTION(compiler_fill_image_3d); Release_v0.3/utests/compiler_fill_image_3d_2.cpp000066400000000000000000000016321223142177000220310ustar00rootroot00000000000000#include "utest_helper.hpp" static void compiler_fill_image_3d_2(void) { const size_t w = 512; const size_t h = 512; const size_t depth = 5; cl_image_format format; format.image_channel_order = CL_RGBA; format.image_channel_data_type = CL_UNSIGNED_INT8; // Setup kernel and images OCL_CREATE_KERNEL("test_fill_image_3d_2"); OCL_CREATE_IMAGE3D(buf[0], 0, &format, w, h, depth, 0, 0, NULL); // Run the kernel OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); globals[0] = w; globals[1] = h; globals[2] = depth; locals[0] = 16; locals[1] = 16; locals[2] = 1; OCL_NDRANGE(3); // Check result OCL_MAP_BUFFER_GTT(0); for (uint32_t k = 0; k < depth; k++) for (uint32_t j = 0; j < h; ++j) for (uint32_t i = 0; i < w; i++) OCL_ASSERT(((uint32_t*)buf_data[0])[k*w*h + j*w + i] == 0x78563412); OCL_UNMAP_BUFFER_GTT(0); } MAKE_UTEST_FROM_FUNCTION(compiler_fill_image_3d_2); Release_v0.3/utests/compiler_function_argument.cpp000066400000000000000000000011311223142177000226530ustar00rootroot00000000000000#include "utest_helper.hpp" void compiler_function_argument(void) { const size_t n = 2048; const int value = 34; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_function_argument"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(int), &value); // Run the kernel globals[0] = n; locals[0] = 16; OCL_NDRANGE(1); OCL_MAP_BUFFER(0); // Check results for (uint32_t i = 0; i < n; ++i) OCL_ASSERT(((int*)buf_data[0])[i] == value); } MAKE_UTEST_FROM_FUNCTION(compiler_function_argument); Release_v0.3/utests/compiler_function_argument0.cpp000066400000000000000000000011371223142177000227410ustar00rootroot00000000000000#include "utest_helper.hpp" void compiler_function_argument0(void) { const size_t n = 2048; const short value = 34; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_function_argument0"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(short), &value); // Run the kernel globals[0] = n; locals[0] = 16; OCL_NDRANGE(1); OCL_MAP_BUFFER(0); // Check results for (uint32_t i = 0; i < n; ++i) OCL_ASSERT(((int*)buf_data[0])[i] == value); } MAKE_UTEST_FROM_FUNCTION(compiler_function_argument0); Release_v0.3/utests/compiler_function_argument1.cpp000066400000000000000000000013651223142177000227450ustar00rootroot00000000000000#include "utest_helper.hpp" void compiler_function_argument1(void) { const size_t n = 2048; const char value = 34; const short value0 = 31; const int value1 = 3; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_function_argument1"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(char), &value); OCL_SET_ARG(2, sizeof(short), &value0); OCL_SET_ARG(3, sizeof(int), &value1); // Run the kernel globals[0] = n; locals[0] = 16; OCL_NDRANGE(1); OCL_MAP_BUFFER(0); // Check results for (uint32_t i = 0; i < n; ++i) OCL_ASSERT(((int*)buf_data[0])[i] == value + value0 + value1); } MAKE_UTEST_FROM_FUNCTION(compiler_function_argument1); Release_v0.3/utests/compiler_function_argument2.cpp000066400000000000000000000030461223142177000227440ustar00rootroot00000000000000#include "utest_helper.hpp" #define VECSIZE 8 void compiler_function_argument2(void) { char arg0[8] = { 0 }; unsigned char arg1[8] = { 0 }; short arg2[8] = { 0 }; unsigned short arg3[8] = { 0 }; int arg4[8] = { 0 }; unsigned int arg5[8] = { 0 }; float arg6[8] = { 0 }; for (uint32_t i = 0; i < 8; ++i) { arg0[i] = rand(); arg1[i] = rand(); arg2[i] = rand(); arg3[i] = rand(); arg4[i] = rand(); arg5[i] = rand(); arg6[i] = rand(); } // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_function_argument2"); OCL_CREATE_BUFFER(buf[0], 0, sizeof(float) * 8 * 8, NULL); OCL_SET_ARG(0, sizeof(arg0), arg0); OCL_SET_ARG(1, sizeof(arg1), arg1); OCL_SET_ARG(2, sizeof(arg2), arg2); OCL_SET_ARG(3, sizeof(arg3), arg3); OCL_SET_ARG(4, sizeof(arg4), arg4); OCL_SET_ARG(5, sizeof(arg5), arg5); OCL_SET_ARG(6, sizeof(arg6), arg6); OCL_SET_ARG(7, sizeof(cl_mem), &buf[0]); // Run the kernel globals[0] = 1; locals[0] = 1; OCL_NDRANGE(1); OCL_MAP_BUFFER(0); /* Check results */ float *dst = (float*)buf_data[0]; for (uint32_t i = 0; i < 8; ++i) { OCL_ASSERT((float)arg0[i] == dst[0*8 + i]); OCL_ASSERT((float)arg1[i] == dst[1*8 + i]); OCL_ASSERT((float)arg2[i] == dst[2*8 + i]); OCL_ASSERT((float)arg3[i] == dst[3*8 + i]); OCL_ASSERT((float)arg4[i] == dst[4*8 + i]); OCL_ASSERT((float)arg5[i] == dst[5*8 + i]); OCL_ASSERT((float)arg6[i] == dst[6*8 + i]); } OCL_UNMAP_BUFFER(0); } MAKE_UTEST_FROM_FUNCTION(compiler_function_argument2); Release_v0.3/utests/compiler_function_constant.cpp000066400000000000000000000015401223142177000226660ustar00rootroot00000000000000#include "utest_helper.hpp" void compiler_function_constant(void) { const size_t n = 2048; const uint32_t value = 34; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_function_constant"); OCL_CREATE_BUFFER(buf[0], 0, 75 * sizeof(short), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(uint32_t), &value); OCL_MAP_BUFFER(0); for(uint32_t i = 0; i < 69; ++i) ((short *)buf_data[0])[i] = i; OCL_UNMAP_BUFFER(0); // Run the kernel globals[0] = n; locals[0] = 16; OCL_NDRANGE(1); OCL_MAP_BUFFER(1); // Check results for (uint32_t i = 0; i < n; ++i) OCL_ASSERT(((uint32_t *)buf_data[1])[i] == (value + i%69)); OCL_UNMAP_BUFFER(1); } MAKE_UTEST_FROM_FUNCTION(compiler_function_constant); Release_v0.3/utests/compiler_function_constant0.cpp000066400000000000000000000020351223142177000227460ustar00rootroot00000000000000#include "utest_helper.hpp" void compiler_function_constant0(void) { const size_t n = 2048; const uint32_t value = 34; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_function_constant0"); OCL_CREATE_BUFFER(buf[0], 0, 75 * sizeof(int32_t), NULL); OCL_CREATE_BUFFER(buf[1], 0, 1 * sizeof(char), NULL); OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(uint32_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); OCL_SET_ARG(3, sizeof(uint32_t), &value); OCL_MAP_BUFFER(0); for(uint32_t i = 0; i < 69; ++i) ((int32_t *)buf_data[0])[i] = i; OCL_UNMAP_BUFFER(0); OCL_MAP_BUFFER(1); ((char *)buf_data[1])[0] = 15; OCL_UNMAP_BUFFER(1); // Run the kernel globals[0] = n; locals[0] = 16; OCL_NDRANGE(1); OCL_MAP_BUFFER(2); // Check results for (uint32_t i = 0; i < n; ++i) OCL_ASSERT(((uint32_t *)buf_data[2])[i] == (value + 15 + i%69)); OCL_UNMAP_BUFFER(2); } MAKE_UTEST_FROM_FUNCTION(compiler_function_constant0); Release_v0.3/utests/compiler_function_constant1.cpp000066400000000000000000000022151223142177000227470ustar00rootroot00000000000000#include "utest_helper.hpp" void compiler_function_constant1(void) { const size_t n = 2048; const uint32_t value = 34; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_function_constant"); OCL_CREATE_BUFFER(buf[0], 0, 75 * sizeof(short), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(uint32_t), &value); OCL_MAP_BUFFER(0); for(uint32_t i = 0; i < 69; ++i) ((short *)buf_data[0])[i] = i; OCL_UNMAP_BUFFER(0); // Run the kernel globals[0] = n; locals[0] = 16; OCL_NDRANGE(1); OCL_CREATE_BUFFER(buf[2], 0, 101 * sizeof(short), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[2]); OCL_MAP_BUFFER(2); for(uint32_t i = 0; i < 69; ++i) ((short *)buf_data[2])[i] = 2*i; OCL_UNMAP_BUFFER(2); // Run the kernel globals[0] = n; locals[0] = 16; OCL_NDRANGE(1); OCL_MAP_BUFFER(1); // Check results for (uint32_t i = 0; i < n; ++i) OCL_ASSERT(((uint32_t *)buf_data[1])[i] == (value + (i%69)*2)); OCL_UNMAP_BUFFER(1); } MAKE_UTEST_FROM_FUNCTION(compiler_function_constant1); Release_v0.3/utests/compiler_function_qualifiers.cpp000066400000000000000000000002711223142177000232010ustar00rootroot00000000000000#include "utest_helper.hpp" void compiler_function_qualifiers(void) { OCL_CREATE_KERNEL("compiler_function_qualifiers"); } MAKE_UTEST_FROM_FUNCTION(compiler_function_qualifiers); Release_v0.3/utests/compiler_geometric_builtin.cpp000066400000000000000000000002621223142177000226340ustar00rootroot00000000000000#include "utest_helper.hpp" void compiler_geometric_builtin(void) { OCL_CREATE_KERNEL("compiler_geometric_builtin"); } MAKE_UTEST_FROM_FUNCTION(compiler_geometric_builtin); Release_v0.3/utests/compiler_get_image_info.cpp000066400000000000000000000021071223142177000220640ustar00rootroot00000000000000#include "utest_helper.hpp" static void compiler_get_image_info(void) { const size_t w = 256; const size_t h = 512; const size_t depth = 3; cl_image_format format; format.image_channel_order = CL_RGBA; format.image_channel_data_type = CL_UNSIGNED_INT8; // Setup kernel and images OCL_CREATE_KERNEL("test_get_image_info"); OCL_CREATE_IMAGE3D(buf[0], 0, &format, w, h, depth, 0, 0, NULL); OCL_CREATE_BUFFER(buf[1], 0, 32 * sizeof(int), NULL); OCL_CREATE_BUFFER(buf[2], 0, 32 * sizeof(int), NULL); // Run the kernel OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); globals[0] = 32; locals[0] = 16; OCL_NDRANGE(1); // Check result OCL_MAP_BUFFER(1); OCL_MAP_BUFFER(2); for (uint32_t i = 0; i < 32; i++) { OCL_ASSERT(((uint32_t*)buf_data[1])[i] == ((w << 20) | (h << 8) | depth)); OCL_ASSERT(((uint32_t*)buf_data[2])[i] == ((CL_UNSIGNED_INT8 << 16) | CL_RGBA)); } OCL_UNMAP_BUFFER(1); OCL_UNMAP_BUFFER(2); } MAKE_UTEST_FROM_FUNCTION(compiler_get_image_info); Release_v0.3/utests/compiler_global_constant.cpp000066400000000000000000000056451223142177000223130ustar00rootroot00000000000000#include "utest_helper.hpp" void compiler_global_constant(void) { const size_t n = 2048; const uint32_t e = 34, r = 77; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_global_constant"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(uint32_t), &e); OCL_SET_ARG(2, sizeof(uint32_t), &r); // Run the kernel globals[0] = n; locals[0] = 16; OCL_NDRANGE(1); unsigned int m[3] = {71,72,73}; // Check results OCL_MAP_BUFFER(0); for (uint32_t i = 0; i < n; ++i) // printf("%d result %d reference %d\n", i, ((uint32_t *)buf_data[0])[i], m[i%3] + e + r); OCL_ASSERT(((uint32_t *)buf_data[0])[i] == m[i%3] + e + r); OCL_UNMAP_BUFFER(0); } void compiler_global_constant1(void) { const size_t n = 32; // Setup kernel and buffers OCL_CREATE_KERNEL_FROM_FILE("compiler_global_constant", "compiler_global_constant1"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); // Run the kernel globals[0] = n; locals[0] = 16; OCL_NDRANGE(1); uint32_t data1[] = {1, 4, 7}; uint32_t data2[]= {3, 7, 11}; // Check results OCL_MAP_BUFFER(0); for (uint32_t i = 0; i < n; ++i) // printf("%d result %d reference %d\n", i, ((uint32_t *)buf_data[0])[i], data1[i%3] + data2[i%3]); OCL_ASSERT(((uint32_t *)buf_data[0])[i] == data1[i%3] + data2[i%3]); OCL_UNMAP_BUFFER(0); } void compiler_global_constant2(void) { const size_t n = 32; // Setup kernel and buffers OCL_CREATE_KERNEL_FROM_FILE("compiler_global_constant", "compiler_global_constant2"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); // Run the kernel globals[0] = n; locals[0] = 16; OCL_NDRANGE(1); // Check results OCL_MAP_BUFFER(0); for (uint32_t i = 0; i < n; ++i) // printf("%d result %d reference %d\n", i, ((uint32_t *)buf_data[0])[i], 6); OCL_ASSERT(((uint32_t *)buf_data[0])[i] == 6); OCL_UNMAP_BUFFER(0); } void compiler_global_constant3(void) { const size_t n = 32; // Setup kernel and buffers OCL_CREATE_KERNEL_FROM_FILE("compiler_global_constant", "compiler_global_constant3"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); // Run the kernel globals[0] = n; locals[0] = 16; OCL_NDRANGE(1); uint32_t data1[] = {3, 6, 9}; char data2[]= {'c', 'f', 'j'}; // Check results OCL_MAP_BUFFER(0); for (uint32_t i = 0; i < n; ++i) // printf("%d result %d reference %d\n", i, ((uint32_t *)buf_data[0])[i], data1[i%3] + (int)data2[i%3]); OCL_ASSERT(((uint32_t *)buf_data[0])[i] == data1[i%3] + (uint32_t)data2[i%3]); OCL_UNMAP_BUFFER(0); } MAKE_UTEST_FROM_FUNCTION(compiler_global_constant); MAKE_UTEST_FROM_FUNCTION(compiler_global_constant1); MAKE_UTEST_FROM_FUNCTION(compiler_global_constant2); MAKE_UTEST_FROM_FUNCTION(compiler_global_constant3); Release_v0.3/utests/compiler_global_constant_2.cpp000066400000000000000000000032161223142177000225240ustar00rootroot00000000000000#include "utest_helper.hpp" void compiler_global_constant_2(void) { const size_t n = 2048; const uint32_t e = 34, r = 77; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_global_constant_2"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(uint32_t), &e); OCL_SET_ARG(2, sizeof(uint32_t), &r); // Run the kernel globals[0] = n; locals[0] = 16; OCL_NDRANGE(1); unsigned int m[3] = {0x15b,0x25b,0x35b}; unsigned int t[5] = {0x45b,0x55b,0x65b,0x75b,0x85b}; // Check results OCL_MAP_BUFFER(0); for (uint32_t i = 0; i < n; ++i) // std::cout << ((uint32_t *)buf_data[0])[i] << std::endl; OCL_ASSERT(((uint32_t *)buf_data[0])[i] == m[i%3] + t[i%5] + e + r); OCL_UNMAP_BUFFER(0); } void compiler_global_constant_2_long(void) { const size_t n = 2048; const uint32_t e = 34, r = 77; // Setup kernel and buffers OCL_CREATE_KERNEL_FROM_FILE("compiler_global_constant_2", "compiler_global_constant_2_long"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint64_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(uint32_t), &e); OCL_SET_ARG(2, sizeof(uint32_t), &r); // Run the kernel globals[0] = n; locals[0] = 16; OCL_NDRANGE(1); uint64_t m[3] = {0x15b,0x25b,0xFFFFFFFFF}; // Check results OCL_MAP_BUFFER(0); for (uint32_t i = 0; i < n; ++i) // std::cout << ((uint64_t *)buf_data[0])[i] << std::endl; OCL_ASSERT(((uint64_t *)buf_data[0])[i] == m[i%3] + e + r); OCL_UNMAP_BUFFER(0); } MAKE_UTEST_FROM_FUNCTION(compiler_global_constant_2); MAKE_UTEST_FROM_FUNCTION(compiler_global_constant_2_long); Release_v0.3/utests/compiler_global_memory_barrier.cpp000066400000000000000000000014201223142177000234630ustar00rootroot00000000000000#include "utest_helper.hpp" static void compiler_global_memory_barrier(void) { const size_t n = 16*1024; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_global_memory_barrier"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); // Run the kernel globals[0] = n/2; locals[0] = 256; OCL_NDRANGE(1); OCL_MAP_BUFFER(0); // Check results uint32_t *dst = (uint32_t*)buf_data[0]; for (uint32_t i = 0; i < n; i+=locals[0]) for (uint32_t j = 0; j < locals[0]; ++j) OCL_ASSERT(dst[i+j] == locals[0] - 1 -j); OCL_UNMAP_BUFFER(0); } MAKE_UTEST_FROM_FUNCTION(compiler_global_memory_barrier); Release_v0.3/utests/compiler_group_size.cpp000066400000000000000000000067251223142177000213300ustar00rootroot00000000000000#include "utest_helper.hpp" #include struct xyz{ unsigned short b; unsigned short e; unsigned int o; }; void compiler_group_size1(void) { const size_t n = 7*32*17; int group_size[] = {7, 17, 32}; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_group_size"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); for(int i = 0; i < 3; i++) { // Run the kernel globals[0] = n; locals[0] = group_size[i]; OCL_NDRANGE(1); OCL_MAP_BUFFER(0); // Check results for (uint32_t i = 0; i < n; ++i) OCL_ASSERT(((uint32_t*)buf_data[0])[i] == i); OCL_UNMAP_BUFFER(0); } } void compiler_group_size2(void) { const uint32_t n = 4*17*8; int size_x[] = {2, 4, 17}; int size_y[] = {2, 4, 4}; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_group_size"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); for(int i = 0; i < 3; i++) { // Run the kernel globals[0] = 4*17; globals[1] = 8; locals[0] = size_x[i]; locals[1] = size_y[i]; OCL_NDRANGE(2); OCL_MAP_BUFFER(0); // Check results for (uint32_t i = 0; i < n; ++i) OCL_ASSERT(((uint32_t*)buf_data[0])[i] == i); OCL_UNMAP_BUFFER(0); } } void compiler_group_size3(void) { const uint32_t n = 4*17*8*4; int size_x[] = {2, 4, 17}; int size_y[] = {2, 4, 4}; int size_z[] = {2, 1, 2}; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_group_size"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); for(int i = 0; i < 3; i++) { // Run the kernel globals[0] = 4*17; globals[1] = 8; globals[2] = 4; locals[0] = size_x[i]; locals[1] = size_y[i]; locals[2] = size_z[i]; OCL_NDRANGE(3); OCL_MAP_BUFFER(0); // Check results for (uint32_t i = 0; i < n; ++i) OCL_ASSERT(((uint32_t*)buf_data[0])[i] == i); OCL_UNMAP_BUFFER(0); } } void compiler_group_size4(void) { const size_t n = 16; uint32_t color = 2; uint32_t num = 1; int group_size[] = {1}; // Setup kernel and buffers OCL_CREATE_KERNEL_FROM_FILE("compiler_group_size", "compiler_group_size4"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(struct xyz), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL); for(uint32_t i = 0; i < num; i++) { // Run the kernel OCL_MAP_BUFFER(0); ((struct xyz*)buf_data[0])[0].b = 0; ((struct xyz*)buf_data[0])[0].e = 2; ((struct xyz*)buf_data[0])[0].o = 0; OCL_UNMAP_BUFFER(0); OCL_MAP_BUFFER(1); memset(((uint32_t*)buf_data[1]), 0x0, sizeof(uint32_t)*n); OCL_UNMAP_BUFFER(1); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(cl_int), &group_size[i]); OCL_SET_ARG(3, sizeof(cl_int), &color); globals[0] = group_size[i]; locals[0] = group_size[i]; OCL_NDRANGE(1); OCL_MAP_BUFFER(1); // Check results for (uint32_t j = 0; j < n; ++j) { // std::cout <<((uint32_t*)buf_data[1])[j] << " "; if(j >= i && j <= i+2) { OCL_ASSERT(((uint32_t*)buf_data[1])[j] == color); } else { OCL_ASSERT(((uint32_t*)buf_data[1])[j] == 0); } } OCL_UNMAP_BUFFER(1); } } MAKE_UTEST_FROM_FUNCTION(compiler_group_size1); MAKE_UTEST_FROM_FUNCTION(compiler_group_size2); MAKE_UTEST_FROM_FUNCTION(compiler_group_size3); MAKE_UTEST_FROM_FUNCTION(compiler_group_size4); Release_v0.3/utests/compiler_hadd.cpp000066400000000000000000000017121223142177000200310ustar00rootroot00000000000000#include "utest_helper.hpp" void compiler_hadd(void) { const int n = 32; int src1[n], src2[n]; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_hadd"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(int), NULL); OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(int), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); globals[0] = n; locals[0] = 16; OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); for (int i = 0; i < n; ++i) { src1[i] = ((int*)buf_data[0])[i] = rand(); src2[i] = ((int*)buf_data[1])[i] = rand(); } OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); OCL_NDRANGE(1); OCL_MAP_BUFFER(2); for (int i = 0; i < n; ++i) { long long a = src1[i]; a += src2[i]; a >>= 1; OCL_ASSERT(((int*)buf_data[2])[i] == (int)a); } OCL_UNMAP_BUFFER(2); } MAKE_UTEST_FROM_FUNCTION(compiler_hadd); Release_v0.3/utests/compiler_if_else.cpp000066400000000000000000000035231223142177000205410ustar00rootroot00000000000000#include "utest_helper.hpp" static void compiler_if_else(void) { const size_t n = 17; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_if_else"); buf_data[0] = (uint32_t*) malloc(sizeof(uint32_t) * n); for (uint32_t i = 0; i < n; ++i) ((uint32_t*)buf_data[0])[i] = 2; OCL_CREATE_BUFFER(buf[0], CL_MEM_COPY_HOST_PTR, n * sizeof(uint32_t), buf_data[0]); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL); free(buf_data[0]); buf_data[0] = NULL; // Run the kernel OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = 16; locals[0] = 16; OCL_NDRANGE(1); // First control flow OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); for (uint32_t i = 0; i < 16; ++i) { OCL_ASSERT(((int32_t*)buf_data[1])[i] == 2); OCL_ASSERT(((int32_t*)buf_data[0])[i] == 1); } // Second control flow for (uint32_t i = 0; i < n; ++i) ((int32_t*)buf_data[0])[i] = -1; OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); OCL_NDRANGE(1); OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); for (uint32_t i = 0; i < 16; ++i) { OCL_ASSERT(((int32_t*)buf_data[1])[i] == -2); OCL_ASSERT(((int32_t*)buf_data[0])[i] == 2); } // Third control flow for (uint32_t i = 0; i < 4; ++i) ((int32_t*)buf_data[0])[i] = 2; for (uint32_t i = 4; i < n; ++i) ((int32_t*)buf_data[0])[i] = -1; OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); OCL_NDRANGE(1); OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); for (uint32_t i = 0; i < 3; ++i) { OCL_ASSERT(((int32_t*)buf_data[1])[i] == 2); OCL_ASSERT(((int32_t*)buf_data[0])[i] == 1); } OCL_ASSERT(((int32_t*)buf_data[1])[3] == -1); OCL_ASSERT(((int32_t*)buf_data[0])[3] == 1); for (uint32_t i = 4; i < 16; ++i) { OCL_ASSERT(((int32_t*)buf_data[1])[i] == -2); OCL_ASSERT(((int32_t*)buf_data[0])[i] == 2); } } MAKE_UTEST_FROM_FUNCTION(compiler_if_else); Release_v0.3/utests/compiler_insert_to_constant.cpp000066400000000000000000000012471223142177000230530ustar00rootroot00000000000000#include "utest_helper.hpp" void compiler_insert_to_constant(void) { const size_t n = 32; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_insert_to_constant"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t[4]), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); // Run the kernel globals[0] = n; locals[0] = 16; OCL_NDRANGE(1); OCL_MAP_BUFFER(0); // Check results uint32_t *data = (uint32_t*) buf_data[0]; for (uint32_t i = 0; i < n; ++i) { OCL_ASSERT(data[4*i+0] == 0); OCL_ASSERT(data[4*i+1] == 1); OCL_ASSERT(data[4*i+2] == i); OCL_ASSERT(data[4*i+3] == 3); } } MAKE_UTEST_FROM_FUNCTION(compiler_insert_to_constant); Release_v0.3/utests/compiler_insert_vector.cpp000066400000000000000000000006161223142177000220210ustar00rootroot00000000000000#include "utest_helper.hpp" void compiler_insert_vector(void) { const size_t n = 2048; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_insert_vector"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int) * 4, NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); // Run the kernel globals[0] = n; locals[0] = 16; OCL_NDRANGE(1); } MAKE_UTEST_FROM_FUNCTION(compiler_insert_vector); Release_v0.3/utests/compiler_insn_selection_masked_min_max.cpp000066400000000000000000000021431223142177000252000ustar00rootroot00000000000000#include "utest_helper.hpp" #include static void compiler_insn_selection_masked_min_max(void) { const size_t n = 256; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_insn_selection_masked_min_max"); buf_data[0] = (uint32_t*) malloc(sizeof(uint32_t) * n); for (uint32_t i = 0; i < n; ++i) ((float*)buf_data[0])[i] = float(i); OCL_CREATE_BUFFER(buf[0], CL_MEM_COPY_HOST_PTR, n * sizeof(uint32_t), buf_data[0]); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL); free(buf_data[0]); buf_data[0] = NULL; // Run the kernel OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = n; locals[0] = 16; OCL_NDRANGE(1); // Check result OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); float *dst = (float*)buf_data[1]; float *src = (float*)buf_data[0]; for (uint32_t i = 0; i < n; ++i) { float cpu_dst; if (i % 16 > 5) cpu_dst = std::max(src[i], src[7]); else cpu_dst = std::min(src[i], src[10]); OCL_ASSERT(dst[i] == cpu_dst); } } MAKE_UTEST_FROM_FUNCTION(compiler_insn_selection_masked_min_max) Release_v0.3/utests/compiler_insn_selection_max.cpp000066400000000000000000000017231223142177000230140ustar00rootroot00000000000000#include "utest_helper.hpp" #include static void compiler_insn_selection_max(void) { const size_t n = 8192 * 4; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_insn_selection_max"); buf_data[0] = (uint32_t*) malloc(sizeof(uint32_t) * n); for (uint32_t i = 0; i < n; ++i) ((float*)buf_data[0])[i] = float(i); OCL_CREATE_BUFFER(buf[0], CL_MEM_COPY_HOST_PTR, n * sizeof(uint32_t), buf_data[0]); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL); free(buf_data[0]); buf_data[0] = NULL; // Run the kernel OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = n; locals[0] = 16; OCL_NDRANGE(1); // Check result OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); float *dst = (float*)buf_data[1]; float *src = (float*)buf_data[0]; for (uint32_t i = 0; i < n; ++i) { OCL_ASSERT(dst[i] == std::max(src[i], src[0])); } } MAKE_UTEST_FROM_FUNCTION(compiler_insn_selection_max) Release_v0.3/utests/compiler_insn_selection_min.cpp000066400000000000000000000017221223142177000230110ustar00rootroot00000000000000#include "utest_helper.hpp" #include static void compiler_insn_selection_min(void) { const size_t n = 8192 * 4; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_insn_selection_min"); buf_data[0] = (uint32_t*) malloc(sizeof(uint32_t) * n); for (uint32_t i = 0; i < n; ++i) ((float*)buf_data[0])[i] = float(i); OCL_CREATE_BUFFER(buf[0], CL_MEM_COPY_HOST_PTR, n * sizeof(uint32_t), buf_data[0]); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL); free(buf_data[0]); buf_data[0] = NULL; // Run the kernel OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = n; locals[0] = 16; OCL_NDRANGE(1); // Check result OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); float *dst = (float*)buf_data[1]; float *src = (float*)buf_data[0]; for (uint32_t i = 0; i < n; ++i) { OCL_ASSERT(dst[i] == std::min(src[i], src[0])); } } MAKE_UTEST_FROM_FUNCTION(compiler_insn_selection_min) Release_v0.3/utests/compiler_integer_builtin.cpp000066400000000000000000000002541223142177000223140ustar00rootroot00000000000000#include "utest_helper.hpp" void compiler_integer_builtin(void) { OCL_CREATE_KERNEL("compiler_integer_builtin"); } MAKE_UTEST_FROM_FUNCTION(compiler_integer_builtin); Release_v0.3/utests/compiler_integer_division.cpp000066400000000000000000000022471223142177000224760ustar00rootroot00000000000000#include "utest_helper.hpp" static void cpu(int global_id, int *src, int *dst, int x) { dst[global_id] = src[global_id] / x; } void compiler_integer_division(void) { const size_t n = 16; int cpu_dst[16], cpu_src[16]; const int x = 7; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_integer_division"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(x), &x); globals[0] = 16; locals[0] = 16; // Run random tests for (uint32_t pass = 0; pass < 8; ++pass) { OCL_MAP_BUFFER(0); for (int32_t i = 0; i < (int32_t) n; ++i) cpu_src[i] = ((int32_t*)buf_data[0])[i] = rand() % 1000; OCL_UNMAP_BUFFER(0); // Run the kernel on GPU OCL_NDRANGE(1); // Run on CPU for (int32_t i = 0; i <(int32_t) n; ++i) cpu(i, cpu_src, cpu_dst, x); // Compare OCL_MAP_BUFFER(1); for (int32_t i = 0; i < 11; ++i) OCL_ASSERT(((int32_t*)buf_data[1])[i] == cpu_dst[i]); OCL_UNMAP_BUFFER(1); } } MAKE_UTEST_FROM_FUNCTION(compiler_integer_division); Release_v0.3/utests/compiler_integer_remainder.cpp000066400000000000000000000022501223142177000226120ustar00rootroot00000000000000#include "utest_helper.hpp" static void cpu(int global_id, int *src, int *dst, int x) { dst[global_id] = src[global_id] % x; } void compiler_integer_remainder(void) { const size_t n = 16; int cpu_dst[16], cpu_src[16]; const int x = 7; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_integer_remainder"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(x), &x); globals[0] = 16; locals[0] = 16; // Run random tests for (uint32_t pass = 0; pass < 8; ++pass) { OCL_MAP_BUFFER(0); for (int32_t i = 0; i < (int32_t) n; ++i) cpu_src[i] = ((int32_t*)buf_data[0])[i] = rand() % 16; OCL_UNMAP_BUFFER(0); // Run the kernel on GPU OCL_NDRANGE(1); // Run on CPU for (int32_t i = 0; i <(int32_t) n; ++i) cpu(i, cpu_src, cpu_dst, x); // Compare OCL_MAP_BUFFER(1); for (int32_t i = 0; i < 11; ++i) OCL_ASSERT(((int32_t*)buf_data[1])[i] == cpu_dst[i]); OCL_UNMAP_BUFFER(1); } } MAKE_UTEST_FROM_FUNCTION(compiler_integer_remainder); Release_v0.3/utests/compiler_load_bool_imm.cpp000066400000000000000000000015061223142177000217260ustar00rootroot00000000000000#include "utest_helper.hpp" static void compiler_load_bool_imm(void) { const size_t n = 1024; const size_t local_size = 16; const int copiesPerWorkItem = 5; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_load_bool_imm"); OCL_CREATE_BUFFER(buf[0], 0, n * copiesPerWorkItem * sizeof(uint32_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, local_size*copiesPerWorkItem*sizeof(int), NULL); // 16 x int OCL_SET_ARG(2, sizeof(int), &copiesPerWorkItem); // 16 x int // Run the kernel globals[0] = n; locals[0] = local_size; OCL_NDRANGE(1); OCL_MAP_BUFFER(0); // Check results int *dst = (int*)buf_data[0]; for (uint32_t i = 0; i < n * copiesPerWorkItem; i++) OCL_ASSERT(dst[i] == copiesPerWorkItem); OCL_UNMAP_BUFFER(0); } MAKE_UTEST_FROM_FUNCTION(compiler_load_bool_imm); Release_v0.3/utests/compiler_local_memory_barrier.cpp000066400000000000000000000026331223142177000233240ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #include "utest_helper.hpp" static void compiler_local_memory_barrier(void) { const size_t n = 1024; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_local_memory_barrier"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, 64, NULL); // 16 x int // Run the kernel globals[0] = n; locals[0] = 16; OCL_NDRANGE(1); OCL_MAP_BUFFER(0); // Check results uint32_t *dst = (uint32_t*)buf_data[0]; for (uint32_t i = 0; i < n; i+=16) for (uint32_t j = 0; j < 16; ++j) OCL_ASSERT(dst[i+j] == 15-j); } MAKE_UTEST_FROM_FUNCTION(compiler_local_memory_barrier); Release_v0.3/utests/compiler_local_memory_barrier_2.cpp000066400000000000000000000014451223142177000235450ustar00rootroot00000000000000#include "utest_helper.hpp" static void compiler_local_memory_barrier_2(void) { const size_t n = 16*1024; globals[0] = n/2; locals[0] = 256; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_local_memory_barrier_2"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL); //OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, locals[0] * 2 * sizeof(uint32_t), NULL); // Run the kernel OCL_NDRANGE(1); OCL_MAP_BUFFER(0); // Check results uint32_t *dst = (uint32_t*)buf_data[0]; for (uint32_t i = 0; i < n; i+=locals[0]) for (uint32_t j = 0; j < locals[0]; ++j) OCL_ASSERT(dst[i+j] == locals[0] - 1 -j); OCL_UNMAP_BUFFER(0); } MAKE_UTEST_FROM_FUNCTION(compiler_local_memory_barrier_2); Release_v0.3/utests/compiler_local_memory_barrier_wg64.cpp000066400000000000000000000026531223142177000241750ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #include "utest_helper.hpp" static void compiler_local_memory_barrier_wg64(void) { const size_t n = 1024; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_local_memory_barrier_wg64"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, 256, NULL); // 64 x int // Run the kernel globals[0] = n; locals[0] = 64; OCL_NDRANGE(1); OCL_MAP_BUFFER(0); // Check results uint32_t *dst = (uint32_t*)buf_data[0]; for (uint32_t i = 0; i < n; i+=64) for (uint32_t j = 0; j < 64; ++j) OCL_ASSERT(dst[i+j] == 63-j); } MAKE_UTEST_FROM_FUNCTION(compiler_local_memory_barrier_wg64); Release_v0.3/utests/compiler_local_memory_two_ptr.cpp000066400000000000000000000030251223142177000233700ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #include "utest_helper.hpp" static void compiler_local_memory_two_ptr(void) { const size_t n = 1024; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_local_memory_two_ptr"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, 64, NULL); // 16 x int OCL_SET_ARG(2, 64, NULL); // 16 x int // Run the kernel globals[0] = n; locals[0] = 16; OCL_NDRANGE(1); OCL_MAP_BUFFER(0); // Check results int32_t *dst = (int32_t*)buf_data[0]; for (int32_t i = 0; i < (int) n; i+=16) for (int32_t j = 0; j < 16; ++j) { const int gid = i + j; const int tid = j; OCL_ASSERT(dst[i+j] == (gid&~0xf) + 15-tid + 15-tid); } } MAKE_UTEST_FROM_FUNCTION(compiler_local_memory_two_ptr); Release_v0.3/utests/compiler_local_slm.cpp000066400000000000000000000020111223142177000210670ustar00rootroot00000000000000#include "utest_helper.hpp" void compiler_local_slm(void) { const size_t n = 32; OCL_CREATE_KERNEL_FROM_FILE("compiler_local_slm", "compiler_local_slm"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); globals[0] = n; locals[0] = 16; OCL_NDRANGE(1); OCL_MAP_BUFFER(0); for (uint32_t i = 0; i < n; ++i) // std::cout << ((int32_t*)buf_data[0])[i] << std::endl; OCL_ASSERT(((int32_t*)buf_data[0])[i] == (i%16 + 2 + 1+ i/16)); OCL_UNMAP_BUFFER(0); } void compiler_local_slm1(void) { const size_t n = 2; OCL_CREATE_KERNEL_FROM_FILE("compiler_local_slm", "compiler_local_slm1"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint64_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); globals[0] = 1; locals[0] = 1; OCL_NDRANGE(1); OCL_MAP_BUFFER(0); uint64_t * ptr = (uint64_t*)buf_data[0]; OCL_ASSERT((ptr[1] -ptr[0]) == 4); OCL_UNMAP_BUFFER(0); } MAKE_UTEST_FROM_FUNCTION(compiler_local_slm); MAKE_UTEST_FROM_FUNCTION(compiler_local_slm1); Release_v0.3/utests/compiler_long.cpp000066400000000000000000000034241223142177000200720ustar00rootroot00000000000000#include #include #include #include "utest_helper.hpp" void compiler_long(void) { const size_t n = 16; int64_t src1[n], src2[n]; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_long"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int64_t), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(int64_t), NULL); OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(int64_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); globals[0] = n; locals[0] = 16; // Run random tests src1[0] = -1L, src2[0] = -1L; src1[1] = 0x8000000000000000UL, src2[1] = 0x8000000000000000UL; src1[2] = 0x7FFFFFFFFFFFFFFFL, src2[2] = 1L; src1[3] = 0xFFFFFFFEL, src2[3] = 1L; src1[4] = 0x7FFFFFFFL, src2[4] = 0x80000000L; src1[5] = 0, src2[5] = 0; src1[6] = 0, src2[6] = 1; src1[7] = -2L, src2[7] = -1L; src1[8] = 0, src2[8] = 0x8000000000000000UL; for (int32_t i = 9; i < (int32_t) n; ++i) { src1[i] = ((int64_t)rand() << 32) + rand(); src2[i] = ((int64_t)rand() << 32) + rand(); } OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); memcpy(buf_data[0], src1, sizeof(src1)); memcpy(buf_data[1], src2, sizeof(src2)); OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); // Run the kernel on GPU OCL_NDRANGE(1); // Compare OCL_MAP_BUFFER(2); for (int32_t i = 0; i < (int32_t) n; ++i) { //printf("%lx\n", ((int64_t *)buf_data[2])[i]); if (i < 5) OCL_ASSERT(src1[i] + src2[i] == ((int64_t *)buf_data[2])[i]); if (i > 5) OCL_ASSERT(src1[i] - src2[i] == ((int64_t *)buf_data[2])[i]); } OCL_UNMAP_BUFFER(2); } MAKE_UTEST_FROM_FUNCTION(compiler_long); Release_v0.3/utests/compiler_long_2.cpp000066400000000000000000000026531223142177000203160ustar00rootroot00000000000000#include #include #include #include "utest_helper.hpp" void compiler_long_2(void) { const size_t n = 16; int64_t src1[n], src2[n]; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_long_2"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int64_t), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(int64_t), NULL); OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(int64_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); globals[0] = n; locals[0] = 16; // Run random tests for (int32_t i = 0; i < (int32_t) n; ++i) { src1[i] = ((int64_t)rand() << 32) + rand(); src2[i] = ((int64_t)rand() << 32) + rand(); } src1[4] = 1; OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); memcpy(buf_data[0], src1, sizeof(src1)); memcpy(buf_data[1], src2, sizeof(src2)); OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); // Run the kernel on GPU OCL_NDRANGE(1); // Compare OCL_MAP_BUFFER(2); int64_t *dest = ((int64_t *)buf_data[2]); //for (int32_t i = 0; i < (int32_t) n; ++i) // printf("%lx\n", dest[i]); OCL_ASSERT(0xFEDCBA9876543210UL == (uint64_t)dest[0]); OCL_ASSERT((src1[1] & src2[1]) == dest[1]); OCL_ASSERT((src1[2] | src2[2]) == dest[2]); OCL_ASSERT((src1[3] ^ src2[3]) == dest[3]); OCL_ASSERT(0x1122334455667788L == dest[4]); OCL_UNMAP_BUFFER(2); } MAKE_UTEST_FROM_FUNCTION(compiler_long_2); Release_v0.3/utests/compiler_long_asr.cpp000066400000000000000000000017521223142177000207410ustar00rootroot00000000000000#include #include #include #include "utest_helper.hpp" void compiler_long_asr(void) { const size_t n = 64; int64_t src[n]; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_long_asr"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int64_t), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(int64_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = n; locals[0] = 16; // Run random tests for (int32_t i = 0; i < (int32_t) n; ++i) src[i] = (int64_t)1 << 63; OCL_MAP_BUFFER(0); memcpy(buf_data[0], src, sizeof(src)); OCL_UNMAP_BUFFER(0); // Run the kernel on GPU OCL_NDRANGE(1); // Compare OCL_MAP_BUFFER(1); int64_t *dest = ((int64_t *)buf_data[1]); for (int32_t i = 0; i < (int32_t) n; ++i) if (i > 7) OCL_ASSERT(dest[i] == src[i] >> i); else OCL_ASSERT(dest[i] == src[i] + 1); OCL_UNMAP_BUFFER(2); } MAKE_UTEST_FROM_FUNCTION(compiler_long_asr); Release_v0.3/utests/compiler_long_cmp.cpp000066400000000000000000000073271223142177000207370ustar00rootroot00000000000000#include #include #include #include "utest_helper.hpp" void compiler_long_cmp(void) { const size_t n = 16; int64_t src1[n], src2[n]; src1[0] = (int64_t)1 << 63, src2[0] = 0x7FFFFFFFFFFFFFFFll; src1[1] = (int64_t)1 << 63, src2[1] = ((int64_t)1 << 63) | 1; src1[2] = -1ll, src2[2] = 0; src1[3] = ((int64_t)123 << 32) | 0x7FFFFFFF, src2[3] = ((int64_t)123 << 32) | 0x80000000; src1[4] = 0x7FFFFFFFFFFFFFFFll, src2[4] = (int64_t)1 << 63; src1[5] = ((int64_t)1 << 63) | 1, src2[5] = (int64_t)1 << 63; src1[6] = 0, src2[6] = -1ll; src1[7] = ((int64_t)123 << 32) | 0x80000000, src2[7] = ((int64_t)123 << 32) | 0x7FFFFFFF; for(size_t i=8; i src2[i]) ? 3 : 4; OCL_ASSERT(x == dest[i]); } OCL_UNMAP_BUFFER(2); OCL_CREATE_KERNEL_FROM_FILE("compiler_long_cmp", "compiler_long_cmp_ge"); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); OCL_NDRANGE(1); OCL_MAP_BUFFER(2); for (int32_t i = 0; i < (int32_t) n; ++i) { int64_t *dest = (int64_t *)buf_data[2]; int64_t x = (src1[i] >= src2[i]) ? 3 : 4; OCL_ASSERT(x == dest[i]); } OCL_UNMAP_BUFFER(2); OCL_CREATE_KERNEL_FROM_FILE("compiler_long_cmp", "compiler_long_cmp_eq"); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); OCL_NDRANGE(1); OCL_MAP_BUFFER(2); for (int32_t i = 0; i < (int32_t) n; ++i) { int64_t *dest = (int64_t *)buf_data[2]; int64_t x = (src1[i] == src2[i]) ? 3 : 4; OCL_ASSERT(x == dest[i]); } OCL_UNMAP_BUFFER(2); OCL_CREATE_KERNEL_FROM_FILE("compiler_long_cmp", "compiler_long_cmp_neq"); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); OCL_NDRANGE(1); OCL_MAP_BUFFER(2); for (int32_t i = 0; i < (int32_t) n; ++i) { int64_t *dest = (int64_t *)buf_data[2]; int64_t x = (src1[i] != src2[i]) ? 3 : 4; OCL_ASSERT(x == dest[i]); } OCL_UNMAP_BUFFER(2); } MAKE_UTEST_FROM_FUNCTION(compiler_long_cmp); Release_v0.3/utests/compiler_long_convert.cpp000066400000000000000000000104411223142177000216270ustar00rootroot00000000000000#include #include #include #include "utest_helper.hpp" // convert shorter integer to 64-bit integer void compiler_long_convert(void) { const size_t n = 16; char src1[n]; short src2[n]; int src3[n]; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_long_convert"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(char), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(short), NULL); OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(int), NULL); OCL_CREATE_BUFFER(buf[3], 0, n * sizeof(int64_t), NULL); OCL_CREATE_BUFFER(buf[4], 0, n * sizeof(int64_t), NULL); OCL_CREATE_BUFFER(buf[5], 0, n * sizeof(int64_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); OCL_SET_ARG(3, sizeof(cl_mem), &buf[3]); OCL_SET_ARG(4, sizeof(cl_mem), &buf[4]); OCL_SET_ARG(5, sizeof(cl_mem), &buf[5]); globals[0] = n; locals[0] = 16; // Run random tests for (int32_t i = 0; i < (int32_t) n; ++i) { src1[i] = -i; src2[i] = -i; src3[i] = -i; } OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); OCL_MAP_BUFFER(2); memcpy(buf_data[0], src1, sizeof(src1)); memcpy(buf_data[1], src2, sizeof(src2)); memcpy(buf_data[2], src3, sizeof(src3)); OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); OCL_UNMAP_BUFFER(2); // Run the kernel on GPU OCL_NDRANGE(1); // Compare OCL_MAP_BUFFER(3); OCL_MAP_BUFFER(4); OCL_MAP_BUFFER(5); int64_t *dst1 = ((int64_t *)buf_data[3]); int64_t *dst2 = ((int64_t *)buf_data[4]); int64_t *dst3 = ((int64_t *)buf_data[5]); for (int32_t i = 0; i < (int32_t) n; ++i) { //printf("%lx %lx %lx\n", dst1[i], dst2[i], dst3[i]); OCL_ASSERT(dst1[i] == -(int64_t)i); OCL_ASSERT(dst2[i] == -(int64_t)i); OCL_ASSERT(dst3[i] == -(int64_t)i); } OCL_UNMAP_BUFFER(3); OCL_UNMAP_BUFFER(4); OCL_UNMAP_BUFFER(5); } MAKE_UTEST_FROM_FUNCTION(compiler_long_convert); // convert 64-bit integer to shorter integer void compiler_long_convert_2(void) { const size_t n = 16; int64_t src[n]; // Setup kernel and buffers OCL_CREATE_KERNEL_FROM_FILE("compiler_long_convert", "compiler_long_convert_2"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(char), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(short), NULL); OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(int), NULL); OCL_CREATE_BUFFER(buf[3], 0, n * sizeof(int64_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); OCL_SET_ARG(3, sizeof(cl_mem), &buf[3]); globals[0] = n; locals[0] = 16; // Run random tests for (int32_t i = 0; i < (int32_t) n; ++i) { src[i] = -i; } OCL_MAP_BUFFER(3); memcpy(buf_data[3], src, sizeof(src)); OCL_UNMAP_BUFFER(3); // Run the kernel on GPU OCL_NDRANGE(1); // Compare OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); OCL_MAP_BUFFER(2); char *dst1 = ((char *)buf_data[0]); short *dst2 = ((short *)buf_data[1]); int *dst3 = ((int *)buf_data[2]); for (int32_t i = 0; i < (int32_t) n; ++i) { //printf("%x %x %x\n", dst1[i], dst2[i], dst3[i]); OCL_ASSERT(dst1[i] == -i); OCL_ASSERT(dst2[i] == -i); OCL_ASSERT(dst3[i] == -i); } OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); OCL_UNMAP_BUFFER(2); } MAKE_UTEST_FROM_FUNCTION(compiler_long_convert_2); // convert 64-bit integer to 32-bit float void compiler_long_convert_to_float(void) { const size_t n = 16; int64_t src[n]; // Setup kernel and buffers OCL_CREATE_KERNEL_FROM_FILE("compiler_long_convert", "compiler_long_convert_to_float"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(int64_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = n; locals[0] = 16; // Run random tests for (int32_t i = 0; i < (int32_t) n; ++i) { src[i] = -(int64_t)i; } OCL_MAP_BUFFER(1); memcpy(buf_data[1], src, sizeof(src)); OCL_UNMAP_BUFFER(1); // Run the kernel on GPU OCL_NDRANGE(1); // Compare OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); float *dst = ((float *)buf_data[0]); for (int32_t i = 0; i < (int32_t) n; ++i) { //printf("%f\n", dst[i]); OCL_ASSERT(dst[i] == src[i]); } OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); } MAKE_UTEST_FROM_FUNCTION(compiler_long_convert_to_float); Release_v0.3/utests/compiler_long_mult.cpp000066400000000000000000000024641223142177000211360ustar00rootroot00000000000000#include #include #include #include "utest_helper.hpp" void compiler_long_mult(void) { const size_t n = 16; int64_t src1[n], src2[n]; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_long_mult"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int64_t), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(int64_t), NULL); OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(int64_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); globals[0] = n; locals[0] = 16; // Run random tests for (int32_t i = 0; i < (int32_t) n; ++i) { src1[i] = 0x77665544FFEEDDCCLL; src2[i] = ((int64_t)rand() << 32) + rand(); } OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); memcpy(buf_data[0], src1, sizeof(src1)); memcpy(buf_data[1], src2, sizeof(src2)); OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); // Run the kernel on GPU OCL_NDRANGE(1); // Compare OCL_MAP_BUFFER(2); for (int32_t i = 0; i < (int32_t) n; ++i) { //printf("%lx\n", ((int64_t *)buf_data[2])[i]); if (i < 3) OCL_ASSERT(src1[i] + src2[i] == ((int64_t *)buf_data[2])[i]); else OCL_ASSERT(src1[i] * src2[i] == ((int64_t *)buf_data[2])[i]); } OCL_UNMAP_BUFFER(2); } MAKE_UTEST_FROM_FUNCTION(compiler_long_mult); Release_v0.3/utests/compiler_long_shl.cpp000066400000000000000000000017411223142177000207400ustar00rootroot00000000000000#include #include #include #include "utest_helper.hpp" void compiler_long_shl(void) { const size_t n = 64; int64_t src[n]; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_long_shl"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int64_t), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(int64_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = n; locals[0] = 16; // Run random tests for (int32_t i = 0; i < (int32_t) n; ++i) src[i] = 1; OCL_MAP_BUFFER(0); memcpy(buf_data[0], src, sizeof(src)); OCL_UNMAP_BUFFER(0); // Run the kernel on GPU OCL_NDRANGE(1); // Compare OCL_MAP_BUFFER(1); int64_t *dest = ((int64_t *)buf_data[1]); for (int32_t i = 0; i < (int32_t) n; ++i) if (i > 7) OCL_ASSERT(dest[i] == ((int64_t)1) << i); else OCL_ASSERT(dest[i] == src[i] + 1); OCL_UNMAP_BUFFER(2); } MAKE_UTEST_FROM_FUNCTION(compiler_long_shl); Release_v0.3/utests/compiler_long_shr.cpp000066400000000000000000000017601223142177000207470ustar00rootroot00000000000000#include #include #include #include "utest_helper.hpp" void compiler_long_shr(void) { const size_t n = 64; uint64_t src[n]; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_long_shr"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint64_t), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint64_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = n; locals[0] = 16; // Run random tests for (int32_t i = 0; i < (int32_t) n; ++i) src[i] = (uint64_t)1 << 63; OCL_MAP_BUFFER(0); memcpy(buf_data[0], src, sizeof(src)); OCL_UNMAP_BUFFER(0); // Run the kernel on GPU OCL_NDRANGE(1); // Compare OCL_MAP_BUFFER(1); uint64_t *dest = ((uint64_t *)buf_data[1]); for (int32_t i = 0; i < (int32_t) n; ++i) if (i > 7) OCL_ASSERT(dest[i] == src[i] >> i); else OCL_ASSERT(dest[i] == src[i] + 1); OCL_UNMAP_BUFFER(2); } MAKE_UTEST_FROM_FUNCTION(compiler_long_shr); Release_v0.3/utests/compiler_lower_return0.cpp000066400000000000000000000027431223142177000217450ustar00rootroot00000000000000#include "utest_helper.hpp" static void compiler_lower_return0(void) { const size_t n = 32; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_lower_return0"); buf_data[0] = (uint32_t*) malloc(sizeof(uint32_t) * n); for (uint32_t i = 0; i < n; ++i) ((uint32_t*)buf_data[0])[i] = 2; OCL_CREATE_BUFFER(buf[0], CL_MEM_COPY_HOST_PTR, n * sizeof(uint32_t), buf_data[0]); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL); free(buf_data[0]); buf_data[0] = NULL; // Run the kernel OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = n; locals[0] = 16; OCL_NDRANGE(1); // First control flow OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); for (int32_t i = 0; i < 32; ++i) OCL_ASSERT(((int32_t*)buf_data[1])[i] == i); // Second control flow for (uint32_t i = 0; i < n; ++i) ((int32_t*)buf_data[0])[i] = -2; OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); OCL_NDRANGE(1); OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); for (uint32_t i = 0; i < 32; ++i) OCL_ASSERT(((int32_t*)buf_data[1])[i] == -2); // Third control flow for (uint32_t i = 0; i < 8; ++i) ((int32_t*)buf_data[0])[i] = 2; OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); OCL_NDRANGE(1); OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); for (int32_t i = 0; i < 8; ++i) OCL_ASSERT(((int32_t*)buf_data[1])[i] == i); for (int32_t i = 8; i < 32; ++i) OCL_ASSERT(((int32_t*)buf_data[1])[i] == -2); } MAKE_UTEST_FROM_FUNCTION(compiler_lower_return0); Release_v0.3/utests/compiler_lower_return1.cpp000066400000000000000000000025611223142177000217440ustar00rootroot00000000000000#include "utest_helper.hpp" static void compiler_lower_return1(void) { const size_t n = 32; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_lower_return1"); buf_data[0] = (uint32_t*) malloc(sizeof(uint32_t) * n); for (uint32_t i = 0; i < n; ++i) ((uint32_t*)buf_data[0])[i] = 2; OCL_CREATE_BUFFER(buf[0], CL_MEM_COPY_HOST_PTR, n * sizeof(uint32_t), buf_data[0]); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL); free(buf_data[0]); buf_data[0] = NULL; // Run the kernel OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = 16; locals[0] = 16; OCL_NDRANGE(1); // First control flow OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); for (int32_t i = 0; i < 11; ++i) OCL_ASSERT(((int32_t*)buf_data[1])[i] == i); for (int32_t i = 11; i < 16; ++i) OCL_ASSERT(((int32_t*)buf_data[1])[i] == 2); // Second control flow for (uint32_t i = 0; i < 4; ++i) ((int32_t*)buf_data[0])[i] = -2; OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); OCL_NDRANGE(1); OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); for (int32_t i = 0; i < 4; ++i) OCL_ASSERT(((int32_t*)buf_data[1])[i] == -2); for (int32_t i = 4; i < 11; ++i) OCL_ASSERT(((int32_t*)buf_data[1])[i] == i); for (int32_t i = 11; i < 16; ++i) OCL_ASSERT(((int32_t*)buf_data[1])[i] == 2); } MAKE_UTEST_FROM_FUNCTION(compiler_lower_return1); Release_v0.3/utests/compiler_lower_return2.cpp000066400000000000000000000022641223142177000217450ustar00rootroot00000000000000#include "utest_helper.hpp" static void cpu(int global_id, int *src, int *dst) { const int id = global_id; dst[id] = id; while (dst[id] > src[id]) { if (dst[id] > 10) return; dst[id]--; } dst[id] += 2; } static void compiler_lower_return2(void) { const size_t n = 16; int cpu_dst[16], cpu_src[16]; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_lower_return2"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = 16; locals[0] = 16; for (uint32_t pass = 0; pass < 8; ++pass) { OCL_MAP_BUFFER(0); for (int32_t i = 0; i < (int32_t) n; ++i) cpu_src[i] = ((int32_t*)buf_data[0])[i] = rand() % 16; OCL_UNMAP_BUFFER(0); // Run the kernel on GPU OCL_NDRANGE(1); // Run on CPU for (int32_t i = 0; i <(int32_t) n; ++i) cpu(i, cpu_src, cpu_dst); // Compare OCL_MAP_BUFFER(1); for (int32_t i = 0; i < 11; ++i) OCL_ASSERT(((int32_t*)buf_data[1])[i] == cpu_dst[i]); OCL_UNMAP_BUFFER(1); } } MAKE_UTEST_FROM_FUNCTION(compiler_lower_return2); Release_v0.3/utests/compiler_mad24.cpp000066400000000000000000000022041223142177000200350ustar00rootroot00000000000000#include "utest_helper.hpp" void compiler_mad24(void) { const int n = 32; int src1[n], src2[n], src3[n]; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_mad24"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(int), NULL); OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(int), NULL); OCL_CREATE_BUFFER(buf[3], 0, n * sizeof(int), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); OCL_SET_ARG(3, sizeof(cl_mem), &buf[3]); globals[0] = n; locals[0] = 16; OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); OCL_MAP_BUFFER(2); for (int i = 0; i < n; ++i) { src1[i] = ((int*)buf_data[0])[i] = rand(); src2[i] = ((int*)buf_data[1])[i] = rand(); src3[i] = ((int*)buf_data[2])[i] = rand(); } OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); OCL_UNMAP_BUFFER(2); OCL_NDRANGE(1); OCL_MAP_BUFFER(3); for (int i = 0; i < n; ++i) OCL_ASSERT(((int*)buf_data[3])[i] == ((src1[i] << 8) >> 8) * ((src2[i] << 8) >> 8) + src3[i]); OCL_UNMAP_BUFFER(3); } MAKE_UTEST_FROM_FUNCTION(compiler_mad24); Release_v0.3/utests/compiler_mad_hi.cpp000066400000000000000000000022511223142177000203510ustar00rootroot00000000000000#include "utest_helper.hpp" void compiler_mad_hi(void) { const int n = 32; int src1[n], src2[n], src3[n]; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_mad_hi"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(int), NULL); OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(int), NULL); OCL_CREATE_BUFFER(buf[3], 0, n * sizeof(int), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); OCL_SET_ARG(3, sizeof(cl_mem), &buf[3]); globals[0] = n; locals[0] = 16; OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); OCL_MAP_BUFFER(2); for (int i = 0; i < n; ++i) { src1[i] = ((int*)buf_data[0])[i] = rand(); src2[i] = ((int*)buf_data[1])[i] = rand(); src3[i] = ((int*)buf_data[2])[i] = rand(); } OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); OCL_UNMAP_BUFFER(2); OCL_NDRANGE(1); OCL_MAP_BUFFER(3); for (int i = 0; i < n; ++i) { long long a = src1[i]; a *= src2[i]; a >>= 32; a += src3[i]; OCL_ASSERT(((int*)buf_data[3])[i] == (int)a); } OCL_UNMAP_BUFFER(3); } MAKE_UTEST_FROM_FUNCTION(compiler_mad_hi); Release_v0.3/utests/compiler_mandelbrot.cpp000066400000000000000000000030631223142177000212610ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #include "utest_helper.hpp" static int *dst = NULL; static const size_t w = 256; static const size_t h = 256; static void compiler_mandelbrot(void) { const size_t global[2] = {w, h}; const size_t local[2] = {16, 1}; const size_t sz = w * h * sizeof(char[4]); OCL_CREATE_KERNEL("compiler_mandelbrot"); OCL_CREATE_BUFFER(buf[0], 0, sz, NULL); OCL_CALL (clSetKernelArg, kernel, 0, sizeof(cl_mem), &buf[0]); OCL_CALL (clEnqueueNDRangeKernel, queue, kernel, 2, NULL, global, local, 0, NULL, NULL); OCL_MAP_BUFFER(0); dst = (int *) buf_data[0]; /* Save the image (for debug purpose) */ cl_write_bmp(dst, w, h, "compiler_mandelbrot.bmp"); /* Compare with the golden image */ OCL_CHECK_IMAGE(dst, w, h, "compiler_mandelbrot_ref.bmp"); } MAKE_UTEST_FROM_FUNCTION(compiler_mandelbrot); Release_v0.3/utests/compiler_mandelbrot_alternate.cpp000066400000000000000000000036441223142177000233250ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #include "utest_helper.hpp" static int *dst = NULL; static const size_t w = 256; static const size_t h = 256; static const float criterium = 4.f; static void compiler_mandelbrot_alternate(void) { const size_t global[2] = {w, h}; const size_t local[2] = {16, 1}; const size_t sz = w * h * sizeof(char[4]); const float rcpWidth = 1.f / float(w); const float rcpHeight = 1.f / float(h); OCL_CREATE_KERNEL("compiler_mandelbrot_alternate"); OCL_CREATE_BUFFER(buf[0], 0, sz, NULL); OCL_CALL (clSetKernelArg, kernel, 0, sizeof(cl_mem), &buf[0]); OCL_CALL (clSetKernelArg, kernel, 1, sizeof(float), &rcpWidth); OCL_CALL (clSetKernelArg, kernel, 2, sizeof(float), &rcpHeight); OCL_CALL (clSetKernelArg, kernel, 3, sizeof(float), &criterium); OCL_CALL (clEnqueueNDRangeKernel, queue, kernel, 2, NULL, global, local, 0, NULL, NULL); OCL_MAP_BUFFER(0); dst = (int *) buf_data[0]; /* Save the image (for debug purpose) */ cl_write_bmp(dst, w, h, "compiler_mandelbrot_alternate.bmp"); /* Compare with the golden image */ OCL_CHECK_IMAGE(dst, w, h, "compiler_mandelbrot_alternate_ref.bmp"); } MAKE_UTEST_FROM_FUNCTION(compiler_mandelbrot_alternate); Release_v0.3/utests/compiler_math.cpp000066400000000000000000000052231223142177000200630ustar00rootroot00000000000000#include "utest_helper.hpp" #include #include static void cpu_compiler_math(float *dst, float *src, int i) { const float x = src[i]; const float PI = 3.141592653589793f; switch (i) { case 0: dst[i] = cosf(x); break; case 1: dst[i] = sinf(x); break; case 2: dst[i] = log2f(x); break; case 3: dst[i] = sqrtf(x); break; case 4: dst[i] = 1.f/ sqrtf(x); break; case 5: dst[i] = 1.f / x; break; case 6: dst[i] = tanf(x); break; case 7: dst[i] = powf(x, 0.3333333333333333333f); break; case 8: dst[i] = ceilf(x); break; case 9: dst[i] = cosf(PI * x); break; case 10: dst[i] = powf(2, x); break; case 11: dst[i] = powf(10, x); break; case 12: dst[i] = expf(x) - 1; break; case 13: dst[i] = logf(x + 1); break; case 14: dst[i] = floorf(log2f(x)); break; case 15: dst[i] = sinf(PI * x); break; case 16: dst[i] = tanf(PI * x); break; case 17: dst[i] = 2 * roundf(x / 2); break; case 18: dst[i] = sinhf(x); break; case 19: dst[i] = coshf(x); break; case 20: dst[i] = tanhf(x); break; case 21: dst[i] = asinhf(x); break; case 22: dst[i] = acoshf(x); break; case 23: dst[i] = atanhf(x); break; case 24: dst[i] = asinf(x); break; case 25: dst[i] = acosf(x); break; case 26: dst[i] = atanf(x); break; case 27: dst[i] = asinf(x) / PI; break; case 28: dst[i] = acosf(x) / PI; break; case 29: dst[i] = atanf(x) / PI; break; case 30: dst[i] = erff(x); break; case 31: dst[i] = nanf(""); break; default: dst[i] = 1.f; break; }; } static void compiler_math(void) { const size_t n = 32; float cpu_dst[32], cpu_src[32]; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_math"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = 16; locals[0] = 16; int j; for(j = 0; j < 1000; j ++) { OCL_MAP_BUFFER(1); for (uint32_t i = 0; i < 32; ++i) cpu_src[i] = ((float*)buf_data[1])[i] = .1f * (rand() & 15); OCL_UNMAP_BUFFER(1); OCL_NDRANGE(1); OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); for (int i = 0; i < 16; ++i) cpu_compiler_math(cpu_dst, cpu_src, i); for (int i = 0; i < 16; ++i) { const float cpu = cpu_dst[i]; const float gpu = ((float*)buf_data[0])[i]; if (isinf(cpu)) OCL_ASSERT(isinf(gpu)); else if (isnan(cpu)) OCL_ASSERT(isnan(gpu)); else OCL_ASSERT(fabs(gpu-cpu) < 1e-3f); } OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); } } MAKE_UTEST_FROM_FUNCTION(compiler_math) Release_v0.3/utests/compiler_math_2op.cpp000066400000000000000000000045661223142177000206540ustar00rootroot00000000000000#include "utest_helper.hpp" #include #include static float rnde(float v) { if(v - floorf(v) > 0.5f) return floorf(v) + 1; if(v - floorf(v) < 0.5f) return floorf(v); if((int)(floorf(v)) & 1) return floorf(v) + 1; return floorf(v); } static void cpu_compiler_math(float *dst, float *src1, float *src2, int i) { const float x = src1[i], y = src2[i]; switch (i) { case 0: dst[i] = x / y; break; case 1: dst[i] = x > y ? x - y : 0; break; case 2: dst[i] = fminf(x - floorf(x), 0x1.FFFFFep-1F); break; case 3: dst[i] = sqrtf(x*x + y*y); break; case 4: dst[i] = x * powf(2, (int)y); break; case 5: dst[i] = powf(x, (int)y); break; case 6: dst[i] = x - rnde(x/y)*y; break; case 7: dst[i] = powf(x, 1.f/(int)(y+1)); break; case 8: dst[i] = x * y < 0 ? -x : x; break; case 9: dst[i] = fabsf(x) > fabsf(y) ? x : fabsf(y) > fabsf(x) ? y : fmaxf(x, y); break; case 10: dst[i] = fabsf(x) < fabsf(y) ? x : fabsf(y) < fabsf(x) ? y : fminf(x, y); break; default: dst[i] = 1.f; break; }; } static void compiler_math_2op(void) { const size_t n = 32; float cpu_dst[32], cpu_src1[32], cpu_src2[32]; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_math_2op"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(float), NULL); OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(float), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); globals[0] = 16; locals[0] = 16; int j; for(j = 0; j < 1000; j ++) { OCL_MAP_BUFFER(1); OCL_MAP_BUFFER(2); for (uint32_t i = 0; i < 32; ++i) { cpu_src1[i] = ((float*)buf_data[1])[i] = .1f * (rand() & 15); cpu_src2[i] = ((float*)buf_data[2])[i] = .1f * (rand() & 15); } OCL_UNMAP_BUFFER(1); OCL_UNMAP_BUFFER(2); OCL_NDRANGE(1); for (int i = 0; i < 16; ++i) cpu_compiler_math(cpu_dst, cpu_src1, cpu_src2, i); OCL_MAP_BUFFER(0); for (int i = 0; i < 16; ++i) { const float cpu = cpu_dst[i]; const float gpu = ((float*)buf_data[0])[i]; if (isinf(cpu)) OCL_ASSERT(isinf(gpu)); else if (isnan(cpu)) OCL_ASSERT(isnan(gpu)); else { OCL_ASSERT(fabs(gpu-cpu) < 1e-3f); } } OCL_UNMAP_BUFFER(0); } } MAKE_UTEST_FROM_FUNCTION(compiler_math_2op) Release_v0.3/utests/compiler_math_3op.cpp000066400000000000000000000036041223142177000206450ustar00rootroot00000000000000#include "utest_helper.hpp" #include #include static void cpu_compiler_math(float *dst, float *src1, float *src2, float *src3, int i) { const float x = src1[i], y = src2[i], z = src3[i]; switch (i) { case 0: dst[i] = x * y + z; break; case 1: dst[i] = x * y + z; break; default: dst[i] = 1.f; break; }; } static void compiler_math_3op(void) { const size_t n = 32; float cpu_dst[32], cpu_src1[32], cpu_src2[32], cpu_src3[32]; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_math_3op"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(float), NULL); OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(float), NULL); OCL_CREATE_BUFFER(buf[3], 0, n * sizeof(float), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); OCL_SET_ARG(3, sizeof(cl_mem), &buf[3]); globals[0] = 16; locals[0] = 16; for (int j = 0; j < 1000; j ++) { OCL_MAP_BUFFER(1); OCL_MAP_BUFFER(2); OCL_MAP_BUFFER(3); for (uint32_t i = 0; i < 32; ++i) { cpu_src1[i] = ((float*)buf_data[1])[i] = .1f * (rand() & 15); cpu_src2[i] = ((float*)buf_data[2])[i] = .1f * (rand() & 15); cpu_src3[i] = ((float*)buf_data[3])[i] = .1f * (rand() & 15); } OCL_UNMAP_BUFFER(1); OCL_UNMAP_BUFFER(2); OCL_UNMAP_BUFFER(3); OCL_NDRANGE(1); for (int i = 0; i < 16; ++i) cpu_compiler_math(cpu_dst, cpu_src1, cpu_src2, cpu_src3, i); OCL_MAP_BUFFER(0); for (int i = 0; i < 16; ++i) { const float cpu = cpu_dst[i]; const float gpu = ((float*)buf_data[0])[i]; if (isinf(cpu)) OCL_ASSERT(isinf(gpu)); else if (isnan(cpu)) OCL_ASSERT(isnan(gpu)); else OCL_ASSERT(fabs(gpu-cpu) < 1e-3f); } OCL_UNMAP_BUFFER(0); } } MAKE_UTEST_FROM_FUNCTION(compiler_math_3op) Release_v0.3/utests/compiler_math_builtin.cpp000066400000000000000000000002431223142177000216060ustar00rootroot00000000000000#include "utest_helper.hpp" void compiler_math_builtin(void) { OCL_CREATE_KERNEL("compiler_math_builtin"); } MAKE_UTEST_FROM_FUNCTION(compiler_math_builtin); Release_v0.3/utests/compiler_math_constants.cpp000066400000000000000000000002511223142177000221530ustar00rootroot00000000000000#include "utest_helper.hpp" void compiler_math_constants(void) { OCL_CREATE_KERNEL("compiler_math_constants"); } MAKE_UTEST_FROM_FUNCTION(compiler_math_constants); Release_v0.3/utests/compiler_mem_fence.cpp000066400000000000000000000003241223142177000210450ustar00rootroot00000000000000/* test OpenCL 1.1 Synchronization, explicit memory fence (section 6.11.9, 6.11.10) */ #include "utest_helper.hpp" void compiler_mem_fence(void) { OCL_CREATE_KERNEL("compiler_mem_fence"); OCL_NDRANGE(1); } Release_v0.3/utests/compiler_movforphi_undef.cpp000066400000000000000000000030301223142177000223160ustar00rootroot00000000000000#include "utest_helper.hpp" static void compiler_movforphi_undef(void) { const size_t w = 16; const size_t h = 16; cl_sampler sampler; cl_image_format format; // Setup kernel and images OCL_CREATE_KERNEL("test_movforphi_undef"); buf_data[0] = (uint32_t*) malloc(sizeof(uint32_t) * w * h); for (uint32_t j = 0; j < h; ++j) for (uint32_t i = 0; i < w; i++) ((uint32_t*)buf_data[0])[j * w + i] = j * w + i; format.image_channel_order = CL_RGBA; format.image_channel_data_type = CL_UNSIGNED_INT8; OCL_CREATE_IMAGE2D(buf[0], CL_MEM_COPY_HOST_PTR, &format, w, h, w * sizeof(uint32_t), buf_data[0]); OCL_CREATE_IMAGE2D(buf[1], 0, &format, w, h, 0, NULL); OCL_CREATE_SAMPLER(sampler, CL_ADDRESS_REPEAT, CL_FILTER_NEAREST); free(buf_data[0]); buf_data[0] = NULL; // Run the kernel OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(sampler), &sampler); globals[0] = w; globals[1] = h; locals[0] = 16; locals[1] = 16; OCL_NDRANGE(2); // Check result OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); // Just compare the initial 2 data is enough for this case, as the initial 2 data must in the first // tile box and we can just get the correct coords. for (uint32_t j = 0; j < 1; ++j) for (uint32_t i = 0; i < 3; i++) { if (i == 0) OCL_ASSERT(((uint32_t*)buf_data[0])[j * w + i + 1] == ((uint32_t*)buf_data[1])[j * w + i]); } OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); } MAKE_UTEST_FROM_FUNCTION(compiler_movforphi_undef); Release_v0.3/utests/compiler_mul24.cpp000066400000000000000000000016641223142177000201020ustar00rootroot00000000000000#include "utest_helper.hpp" void compiler_mul24(void) { const int n = 32; int src1[n], src2[n]; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_mul24"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(int), NULL); OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(int), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); globals[0] = n; locals[0] = 16; OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); for (int i = 0; i < n; ++i) { src1[i] = ((int*)buf_data[0])[i] = rand(); src2[i] = ((int*)buf_data[1])[i] = rand(); } OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); OCL_NDRANGE(1); OCL_MAP_BUFFER(2); for (int i = 0; i < n; ++i) OCL_ASSERT(((int*)buf_data[2])[i] == ((src1[i] << 8) >> 8) * ((src2[i] << 8) >> 8)); OCL_UNMAP_BUFFER(2); } MAKE_UTEST_FROM_FUNCTION(compiler_mul24); Release_v0.3/utests/compiler_mul_hi.cpp000066400000000000000000000017211223142177000204060ustar00rootroot00000000000000#include "utest_helper.hpp" void compiler_mul_hi(void) { const int n = 32; int src1[n], src2[n]; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_mul_hi"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(int), NULL); OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(int), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); globals[0] = n; locals[0] = 16; OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); for (int i = 0; i < n; ++i) { src1[i] = ((int*)buf_data[0])[i] = rand(); src2[i] = ((int*)buf_data[1])[i] = rand(); } OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); OCL_NDRANGE(1); OCL_MAP_BUFFER(2); for (int i = 0; i < n; ++i) { long long a = src1[i]; a *= src2[i]; a >>= 32; OCL_ASSERT(((int*)buf_data[2])[i] == (int)a); } OCL_UNMAP_BUFFER(2); } MAKE_UTEST_FROM_FUNCTION(compiler_mul_hi); Release_v0.3/utests/compiler_multiple_kernels.cpp000066400000000000000000000003151223142177000225050ustar00rootroot00000000000000#include "utest_helper.hpp" static void compiler_multiple_kernels(void) { OCL_CREATE_KERNEL_FROM_FILE("compiler_multiple_kernels", "first_kernel"); } MAKE_UTEST_FROM_FUNCTION(compiler_multiple_kernels);Release_v0.3/utests/compiler_preprocessor_macros.cpp000066400000000000000000000002701223142177000232210ustar00rootroot00000000000000#include "utest_helper.hpp" void compiler_preprocessor_macros(void) { OCL_CREATE_KERNEL("compiler_preprocessor_macros"); } MAKE_UTEST_FROM_FUNCTION(compiler_preprocessor_macros); Release_v0.3/utests/compiler_program_objects.cpp000066400000000000000000000042551223142177000223160ustar00rootroot00000000000000/* test OpenCL 1.1 Program Objects (section 5.6) * test creating program objects, * build program executable, * build options * query program objects */ #include "utest_helper.hpp" void compiler_program_objects(void) { OCL_CREATE_KERNEL("empty"); // set up global vars OCL_CALL(clRetainProgram, program); OCL_CALL(clReleaseProgram, program); OCL_CALL(clBuildProgram, program, 1, &device, "-Dname -Dname2=def -ldir " "-cl-opt-disable -cl-strict-aliasing -cl-mad-enable -cl-no-signed-zeros " "-cl-finite-math-only -cl-fast-relaxed-math -cl-unsafe-math-optimizations " "-cl-single-precision-constant -cl-denorms-are-zero " "-w -Werror -cl-std=CL1.1", NULL, NULL); const int pi[] = {CL_PROGRAM_REFERENCE_COUNT, CL_PROGRAM_CONTEXT, CL_PROGRAM_NUM_DEVICES, CL_PROGRAM_DEVICES, CL_PROGRAM_SOURCE, CL_PROGRAM_BINARY_SIZES, CL_PROGRAM_BINARIES,}; const int pbi[] = {CL_PROGRAM_BUILD_STATUS, CL_PROGRAM_BUILD_OPTIONS, CL_PROGRAM_BUILD_LOG,}; char param_value[1024]; size_t pv_size; int i; for(i=0; i>= 1; OCL_ASSERT(((int*)buf_data[2])[i] == (int)a); } OCL_UNMAP_BUFFER(2); } MAKE_UTEST_FROM_FUNCTION(compiler_rhadd); Release_v0.3/utests/compiler_rotate.cpp000066400000000000000000000017361223142177000204350ustar00rootroot00000000000000#include "utest_helper.hpp" int cpu(int src, int y) { return (src << y) | (src >> (32 - y)); } void compiler_rotate(void) { const int n = 32; int src[n], y[n]; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_rotate"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(int), NULL); OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(int), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); globals[0] = n; locals[0] = 16; OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(2); for (int i = 0; i < n; ++i) { src[i] = ((int*)buf_data[0])[i] = rand(); y[i] = ((int*)buf_data[2])[i] = rand() & 31; } OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(2); OCL_NDRANGE(1); OCL_MAP_BUFFER(1); for (int i = 0; i < n; ++i) OCL_ASSERT(((int*)buf_data[1])[i] == cpu(src[i], y[i])); OCL_UNMAP_BUFFER(1); } MAKE_UTEST_FROM_FUNCTION(compiler_rotate); Release_v0.3/utests/compiler_sampler.cpp000066400000000000000000000022151223142177000205730ustar00rootroot00000000000000/* test OpenCL 1.1 Sampler Objects (section 5.5) */ #include "utest_helper.hpp" void compiler_sampler(void) { OCL_CREATE_KERNEL("compiler_sampler"); OCL_ASSERT(ctx != 0); cl_sampler s; cl_int err; int a1[] = {CL_TRUE, CL_FALSE}, a2[] = {CL_ADDRESS_MIRRORED_REPEAT, CL_ADDRESS_REPEAT, CL_ADDRESS_CLAMP_TO_EDGE, CL_ADDRESS_CLAMP, CL_ADDRESS_NONE}, a3[] = {CL_FILTER_NEAREST, CL_FILTER_LINEAR}, a4[] = {CL_SAMPLER_REFERENCE_COUNT, CL_SAMPLER_CONTEXT, CL_SAMPLER_NORMALIZED_COORDS, CL_SAMPLER_ADDRESSING_MODE, CL_SAMPLER_FILTER_MODE}; char pv[1000]; size_t pv_size; int i, j, k, l; for(i=0; i<2; i++) for(j=0; j<5; j++) for(k=0; k<2; k++) { s = clCreateSampler(ctx, a1[i], a2[j], a3[k], &err); OCL_ASSERT(err == CL_SUCCESS); OCL_CALL(clRetainSampler, s); OCL_CALL(clReleaseSampler, s); for(l=0; l<5; l++) OCL_CALL(clGetSamplerInfo, s, a4[l], 1000, pv, &pv_size); OCL_CALL(clReleaseSampler, s); } } MAKE_UTEST_FROM_FUNCTION(compiler_sampler); Release_v0.3/utests/compiler_saturate.cpp000066400000000000000000000106331223142177000207630ustar00rootroot00000000000000#include "utest_helper.hpp" namespace { constexpr int n = 16; // declaration only, we should create each template specification for each type. template T get_data(int idx, int part); /* the format of test data is as follows: * the first column is A * the second column is B * the third column is the expected result. */ #define DEF_TEMPLATE(TYPE, NAME) \ template <> \ TYPE get_data(int idx, int part) \ { \ static TYPE test_data[n][3] = { \ { 0, 0, 0 }, \ { 0, 1, 1 }, \ { 0, 2, 2 }, \ { -1, 1, 0 }, \ { 1, -2, -1 }, \ { 0, 110, 110 }, \ { -10, -10, -20 }, \ { CL_##NAME##_MIN, CL_##NAME##_MIN, CL_##NAME##_MIN }, \ { CL_##NAME##_MIN, CL_##NAME##_MAX, -1 }, \ { CL_##NAME##_MAX, 0, CL_##NAME##_MAX }, \ { CL_##NAME##_MAX, 1, CL_##NAME##_MAX }, \ { CL_##NAME##_MAX, 2, CL_##NAME##_MAX }, \ { CL_##NAME##_MAX, CL_##NAME##_MAX, CL_##NAME##_MAX }, \ { CL_##NAME##_MAX/2, CL_##NAME##_MAX/2, CL_##NAME##_MAX-1 }, \ { CL_##NAME##_MAX/2, CL_##NAME##_MAX/2+1, CL_##NAME##_MAX }, \ { CL_##NAME##_MAX/2+1, CL_##NAME##_MAX/2+1, CL_##NAME##_MAX } \ }; \ return test_data[idx][part]; \ } \ \ template <> \ u##TYPE get_data(int idx, int part) \ { \ static u##TYPE test_data[n][3] = { \ { 0, 0, 0 }, \ { CL_U##NAME##_MAX, 0, CL_U##NAME##_MAX }, \ { CL_U##NAME##_MAX, 1, CL_U##NAME##_MAX }, \ { CL_U##NAME##_MAX, 2, CL_U##NAME##_MAX }, \ { CL_U##NAME##_MAX, CL_U##NAME##_MAX, CL_U##NAME##_MAX }, \ { CL_U##NAME##_MAX/2, CL_U##NAME##_MAX/2, CL_U##NAME##_MAX-1 }, \ { CL_U##NAME##_MAX/2, CL_U##NAME##_MAX/2+1, CL_U##NAME##_MAX }, \ { CL_U##NAME##_MAX/2+1, CL_U##NAME##_MAX/2+1, CL_U##NAME##_MAX }\ }; \ return test_data[idx][part]; \ } DEF_TEMPLATE(int8_t, CHAR) DEF_TEMPLATE(int16_t, SHRT) DEF_TEMPLATE(int32_t, INT) //DEF_TEMPLATE(int64_t, LONG) template void test(const char *kernel_name) { T C[n] = { 0 }; T A[n] = { 0 }; T B[n] = { 0 }; for (int i = 0; i < n; i++) { A[i] = get_data(i, 0); B[i] = get_data(i, 1); } OCL_CREATE_KERNEL_FROM_FILE("compiler_saturate", kernel_name); OCL_CREATE_BUFFER(buf[0], CL_MEM_COPY_HOST_PTR, n * sizeof(T), &C[0]); OCL_CREATE_BUFFER(buf[1], CL_MEM_COPY_HOST_PTR, n * sizeof(T), &A[0]); OCL_CREATE_BUFFER(buf[2], CL_MEM_COPY_HOST_PTR, n * sizeof(T), &B[0]); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); globals[0] = n; locals[0] = n; OCL_NDRANGE(1); OCL_MAP_BUFFER(0); for (int i = 0; i < n; i++) { OCL_ASSERT(((T*)buf_data[0])[i] == get_data(i, 2)); } OCL_UNMAP_BUFFER(0); } } #define compiler_saturate(type, kernel) \ static void compiler_saturate_ ##type(void)\ {\ test(# kernel);\ }\ MAKE_UTEST_FROM_FUNCTION(compiler_saturate_ ## type); compiler_saturate(int8_t, test_char) compiler_saturate(uint8_t, test_uchar) compiler_saturate(int16_t, test_short) compiler_saturate(uint16_t, test_ushort) compiler_saturate(int32_t, test_int) compiler_saturate(uint32_t, test_uint) //compiler_saturate(int64_t, test_long) //compiler_saturate(uint64_t, test_ulong) Release_v0.3/utests/compiler_saturate_sub.cpp000066400000000000000000000110451223142177000216320ustar00rootroot00000000000000#include "utest_helper.hpp" namespace { constexpr int n = 16; // declaration only, we should create each template specification for each type. template T get_data(int idx, int part); /* the format of test data is as follows: * the first column is A * the second column is B * the third column is the expected result. */ #define DEF_TEMPLATE(TYPE, NAME) \ template <> \ TYPE get_data(int idx, int part) \ { \ static TYPE test_data[n][3] = { \ { 0, 0, 0 }, \ { 0, 1, -1 }, \ { CL_##NAME##_MIN, CL_##NAME##_MIN, 0 }, \ { CL_##NAME##_MAX, CL_##NAME##_MAX, 0 }, \ { -2, CL_##NAME##_MIN, CL_##NAME##_MAX-1 }, \ { -1, CL_##NAME##_MIN, CL_##NAME##_MAX }, \ { 0, CL_##NAME##_MIN, CL_##NAME##_MAX }, \ { 1, CL_##NAME##_MIN, CL_##NAME##_MAX }, \ { -2, CL_##NAME##_MAX, CL_##NAME##_MIN }, \ { -1, CL_##NAME##_MAX, CL_##NAME##_MIN }, \ { 0, CL_##NAME##_MAX, -CL_##NAME##_MAX }, \ { 1, CL_##NAME##_MAX, -CL_##NAME##_MAX+1 }, \ { CL_##NAME##_MIN, CL_##NAME##_MAX, CL_##NAME##_MIN }, \ { CL_##NAME##_MIN, 1, CL_##NAME##_MIN }, \ { CL_##NAME##_MIN, -1, CL_##NAME##_MIN+1 }, \ { CL_##NAME##_MAX, CL_##NAME##_MIN, CL_##NAME##_MAX }, \ }; \ return test_data[idx][part]; \ } \ \ template <> \ u##TYPE get_data(int idx, int part) \ { \ static u##TYPE test_data[n][3] = { \ { 0, 0, 0 }, \ { 0, 1, 0 }, \ { 1, 1, 0 }, \ { 1, 0, 1 }, \ { CL_U##NAME##_MAX, CL_U##NAME##_MAX, 0 }, \ { 0, CL_U##NAME##_MAX, 0 }, \ { 1, CL_U##NAME##_MAX, 0 }, \ { CL_U##NAME##_MAX, 0, CL_U##NAME##_MAX }, \ }; \ return test_data[idx][part]; \ } DEF_TEMPLATE(int8_t, CHAR) DEF_TEMPLATE(int16_t, SHRT) DEF_TEMPLATE(int32_t, INT) //DEF_TEMPLATE(int64_t, LONG) template void test(const char *kernel_name) { T C[n] = { 0 }; T A[n] = { 0 }; T B[n] = { 0 }; for (int i = 0; i < n; i++) { A[i] = get_data(i, 0); B[i] = get_data(i, 1); } OCL_CREATE_KERNEL_FROM_FILE("compiler_saturate_sub", kernel_name); OCL_CREATE_BUFFER(buf[0], CL_MEM_COPY_HOST_PTR, n * sizeof(T), &C[0]); OCL_CREATE_BUFFER(buf[1], CL_MEM_COPY_HOST_PTR, n * sizeof(T), &A[0]); OCL_CREATE_BUFFER(buf[2], CL_MEM_COPY_HOST_PTR, n * sizeof(T), &B[0]); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); globals[0] = n; locals[0] = n; OCL_NDRANGE(1); OCL_MAP_BUFFER(0); for (int i = 0; i < n; i++) { OCL_ASSERT(((T*)buf_data[0])[i] == get_data(i, 2)); } OCL_UNMAP_BUFFER(0); } } #define compiler_saturate_sub(type, kernel) \ static void compiler_saturate_sub_ ##type(void)\ {\ test(# kernel);\ }\ MAKE_UTEST_FROM_FUNCTION(compiler_saturate_sub_ ## type); compiler_saturate_sub(int8_t, test_char) compiler_saturate_sub(uint8_t, test_uchar) compiler_saturate_sub(int16_t, test_short) compiler_saturate_sub(uint16_t, test_ushort) //compiler_saturate_sub(int32_t, test_int) // TODO due to the possible hardware bug, we disable this, uncomment it when it's done. compiler_saturate_sub(uint32_t, test_uint) //compiler_saturate_sub(int64_t, test_long) //compiler_saturate_sub(uint64_t, test_ulong) Release_v0.3/utests/compiler_shader_toy.cpp000066400000000000000000000067251223142177000213030ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /* This is a super simple wrapper for the OpenCL kernels I ported from GLSL code * taken in Inigo's web site: * http://www.iquilezles.org/apps/shadertoy/index.html * * They are pretty cool and rather complex kernels. Just the right thing to have * something a bit more complicated and interesting than unit tests. * * The code here is just to wrap the common code used by all the kernels (to run * the code and assert its correctness) */ #include "utest_helper.hpp" static const int dim = 256; // tricky here 'name' stands for Kernel and Reference // 'file' stands for .cl file name and dst image name static void run_kernel(int w, int h, const char *file, const char *name) { const size_t global[2] = {size_t(w), size_t(h)}; const size_t local[2] = {16, 1}; const size_t sz = w * h * sizeof(char[4]); const float fx = float(w); const float fy = float(h); char kernel_file[256]; char dst_img[256]; char ref_img[256]; snprintf(kernel_file, sizeof(kernel_file), "%s.cl", file); snprintf(dst_img, sizeof(dst_img), "%s.bmp", file); snprintf(ref_img, sizeof(ref_img), "%s_ref.bmp", name); OCL_CALL (cl_kernel_init, kernel_file, name, SOURCE, NULL); OCL_CREATE_BUFFER(buf[0], 0, sz, NULL); OCL_CALL (clSetKernelArg, kernel, 0, sizeof(cl_mem), &buf[0]); OCL_CALL (clSetKernelArg, kernel, 1, sizeof(float), &fx); OCL_CALL (clSetKernelArg, kernel, 2, sizeof(float), &fy); OCL_CALL (clSetKernelArg, kernel, 3, sizeof(int), &w); OCL_CALL (clEnqueueNDRangeKernel, queue, kernel, 2, NULL, global, local, 0, NULL, NULL); OCL_MAP_BUFFER(0); int *dst = (int*) buf_data[0]; /* Save the image (for debug purpose) */ cl_write_bmp(dst, w, h, dst_img); /* Compare with the golden image */ OCL_CHECK_IMAGE(dst, w, h, ref_img); } #define DECL_SHADER_TOY_TEST(W,H,FILE_NAME, KERNEL_NAME) \ static void FILE_NAME(void) { run_kernel(W,H,#FILE_NAME, #KERNEL_NAME); } \ MAKE_UTEST_FROM_FUNCTION(FILE_NAME); DECL_SHADER_TOY_TEST(dim,dim,compiler_clod,compiler_clod); DECL_SHADER_TOY_TEST(dim,dim,compiler_ribbon,compiler_ribbon); DECL_SHADER_TOY_TEST(dim,dim,compiler_nautilus,compiler_nautilus); DECL_SHADER_TOY_TEST(dim,dim,compiler_menger_sponge_no_shadow,compiler_menger_sponge_no_shadow); DECL_SHADER_TOY_TEST(dim,dim,compiler_julia,compiler_julia); DECL_SHADER_TOY_TEST(dim,dim,compiler_julia_no_break,compiler_julia_no_break); // test for function calls DECL_SHADER_TOY_TEST(dim,dim,compiler_clod_function_call,compiler_clod); DECL_SHADER_TOY_TEST(dim,dim,compiler_julia_function_call,compiler_julia); // Still issues here for LLVM 3.2 // DECL_SHADER_TOY_TEST(dim,dim,compiler_chocolux,compiler_chocolux); // DECL_SHADER_TOY_TEST(dim,dim,compiler_menger_sponge,compiler_menger_sponge); #undef DECL_SHADER_TOY_TEST Release_v0.3/utests/compiler_shift_right.cpp000066400000000000000000000022011223142177000214350ustar00rootroot00000000000000#include "utest_helper.hpp" typedef unsigned int uint; static void cpu(int global_id, uint *src, int *dst) { dst[global_id] = src[global_id] >> 24; } void compiler_shift_right(void) { const size_t n = 16; uint cpu_src[16]; int cpu_dst[16]; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_shift_right"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(int), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = 16; locals[0] = 16; // Run random tests for (uint32_t pass = 0; pass < 8; ++pass) { OCL_MAP_BUFFER(0); for (int32_t i = 0; i < (int32_t) n; ++i) cpu_src[i] = ((uint*)buf_data[0])[i] = 0x80000000 | rand(); OCL_UNMAP_BUFFER(0); // Run the kernel on GPU OCL_NDRANGE(1); // Run on CPU for (int32_t i = 0; i < (int32_t) n; ++i) cpu(i, cpu_src, cpu_dst); // Compare OCL_MAP_BUFFER(1); for (int32_t i = 0; i < (int32_t) n; ++i) OCL_ASSERT(((int *)buf_data[1])[i] == cpu_dst[i]); OCL_UNMAP_BUFFER(1); } } MAKE_UTEST_FROM_FUNCTION(compiler_shift_right); Release_v0.3/utests/compiler_short_scatter.cpp000066400000000000000000000010451223142177000220140ustar00rootroot00000000000000#include "utest_helper.hpp" static void compiler_short_scatter(void) { const size_t n = 128; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_short_scatter"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int16_t), NULL); // Run the kernel OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); globals[0] = n; locals[0] = 16; OCL_NDRANGE(1); // Check result OCL_MAP_BUFFER(0); for (int32_t i = 0; i < (int32_t) n; ++i) OCL_ASSERT(((int16_t*)buf_data[0])[i] == (int16_t) i); } MAKE_UTEST_FROM_FUNCTION(compiler_short_scatter); Release_v0.3/utests/compiler_smoothstep.cpp000066400000000000000000000027471223142177000213470ustar00rootroot00000000000000#include #include "utest_helper.hpp" float cpu(float e0, float e1, float x) { x = (x - e0) / (e1 - e0); if (x >= 1) x = 1.f; if (x <= 0) x = 0.f; return x * x * (3 - 2 * x); } void compiler_smoothstep(void) { const int n = 32; float src1[n], src2[n], src3[n]; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_smoothstep"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(float), NULL); OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(float), NULL); OCL_CREATE_BUFFER(buf[3], 0, n * sizeof(float), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); OCL_SET_ARG(3, sizeof(cl_mem), &buf[3]); globals[0] = n; locals[0] = 16; OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); OCL_MAP_BUFFER(2); for (int i = 0; i < n; ++i) { float a = 0.1f * (rand() & 15) - 0.75f; float b = a + 0.1f * (rand() & 15) + 0.1f; float c = 0.1f * (rand() & 15) - 0.75f; src1[i] = ((float*)buf_data[0])[i] = a; src2[i] = ((float*)buf_data[1])[i] = b; src3[i] = ((float*)buf_data[2])[i] = c; } OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); OCL_UNMAP_BUFFER(2); OCL_NDRANGE(1); OCL_MAP_BUFFER(3); for (int i = 0; i < n; ++i) { float a = ((float*)buf_data[3])[i]; float b = cpu(src1[i], src2[i], src3[i]); OCL_ASSERT(fabsf(a - b) < 1e-4f); } OCL_UNMAP_BUFFER(3); } MAKE_UTEST_FROM_FUNCTION(compiler_smoothstep); Release_v0.3/utests/compiler_step.cpp000066400000000000000000000212121223142177000201010ustar00rootroot00000000000000#include "utest_helper.hpp" #include "string.h" template struct cl_vec { T ptr[((N+1)/2)*2]; //align to 2 elements. typedef cl_vec vec_type; cl_vec(void) { memset(ptr, 0, sizeof(T) * ((N+1)/2)*2); } cl_vec(vec_type & other) { memset(ptr, 0, sizeof(T) * ((N+1)/2)*2); memcpy (this->ptr, other.ptr, sizeof(T) * N); } vec_type& operator= (vec_type & other) { memset(ptr, 0, sizeof(T) * ((N+1)/2)*2); memcpy (this->ptr, other.ptr, sizeof(T) * N); return *this; } template vec_type& operator= (cl_vec & other) { memset(ptr, 0, sizeof(T) * ((N+1)/2)*2); memcpy (this->ptr, other.ptr, sizeof(T) * N); return *this; } bool operator== (vec_type & other) { return !memcmp (this->ptr, other.ptr, sizeof(T) * N); } void step (vec_type & other) { int i = 0; for (; i < N; i++) { T a = ptr[i]; T edge = other.ptr[i]; T f = a < edge ? 0.0 : 1.0; ptr[i] = f; } } void step (float & edge) { int i = 0; for (; i < N; i++) { T a = ptr[i]; T f = a < edge ? 0.0 : 1.0; ptr[i] = f; } } }; template static void cpu (int global_id, cl_vec *edge, cl_vec *src, cl_vec *dst) { cl_vec v = src[global_id]; v.step(edge[global_id]); dst[global_id] = v; } template static void cpu(int global_id, T *edge, T *src, U *dst) { T f = src[global_id]; T e = edge[global_id]; f = f < e ? 0.0 : 1.0; dst[global_id] = (U)f; } template static void cpu (int global_id, float edge, cl_vec *src, cl_vec *dst) { cl_vec v = src[global_id]; v.step(edge); dst[global_id] = v; } template static void cpu(int global_id, float edge, T *src, U *dst) { T f = src[global_id]; f = f < edge ? 0.0 : 1.0; dst[global_id] = (U)f; } template static void gen_rand_val (cl_vec& vect) { int i = 0; memset(vect.ptr, 0, sizeof(T) * ((N+1)/2)*2); for (; i < N; i++) { vect.ptr[i] = static_cast(.1f * (rand() & 15) - .75f); } } template static void gen_rand_val (T & val) { val = static_cast(.1f * (rand() & 15) - .75f); } template inline static void print_data (T& val) { if (std::is_unsigned::value) printf(" %u", val); else printf(" %d", val); } inline static void print_data (float& val) { printf(" %f", val); } template static void dump_data (cl_vec* edge, cl_vec* src, cl_vec* dst, int n) { U* val = reinterpret_cast(dst); n = n*((N+1)/2)*2; printf("\nEdge: \n"); for (int32_t i = 0; i < (int32_t) n; ++i) { print_data(((T *)buf_data[0])[i]); } printf("\nx: \n"); for (int32_t i = 0; i < (int32_t) n; ++i) { print_data(((T *)buf_data[1])[i]); } printf("\nCPU: \n"); for (int32_t i = 0; i < (int32_t) n; ++i) { print_data(val[i]); } printf("\nGPU: \n"); for (int32_t i = 0; i < (int32_t) n; ++i) { print_data(((U *)buf_data[2])[i]); } } template static void dump_data (T* edge, T* src, U* dst, int n) { printf("\nedge: \n"); for (int32_t i = 0; i < (int32_t) n; ++i) { print_data(((T *)buf_data[0])[i]); } printf("\nx: \n"); for (int32_t i = 0; i < (int32_t) n; ++i) { print_data(((T *)buf_data[1])[i]); } printf("\nCPU: \n"); for (int32_t i = 0; i < (int32_t) n; ++i) { print_data(dst[i]); } printf("\nGPU: \n"); for (int32_t i = 0; i < (int32_t) n; ++i) { print_data(((U *)buf_data[2])[i]); } } template static void dump_data (float edge, cl_vec* src, cl_vec* dst, int n) { U* val = reinterpret_cast(dst); n = n*((N+1)/2)*2; printf("\nEdge: %f\n", edge); printf("\nx: \n"); for (int32_t i = 0; i < (int32_t) n; ++i) { print_data(((T *)buf_data[0])[i]); } printf("\nCPU: \n"); for (int32_t i = 0; i < (int32_t) n; ++i) { print_data(val[i]); } printf("\nGPU: \n"); for (int32_t i = 0; i < (int32_t) n; ++i) { print_data(((U *)buf_data[1])[i]); } } template static void dump_data (float edge, T* src, U* dst, int n) { printf("\nedge: %f\n", edge); printf("\nx: \n"); for (int32_t i = 0; i < (int32_t) n; ++i) { print_data(((T *)buf_data[0])[i]); } printf("\nCPU: \n"); for (int32_t i = 0; i < (int32_t) n; ++i) { print_data(dst[i]); } printf("\nGPU: \n"); for (int32_t i = 0; i < (int32_t) n; ++i) { print_data(((U *)buf_data[1])[i]); } } template static void compiler_step_with_type(void) { const size_t n = 16; T cpu_dst[n], cpu_src[n]; T edge[n]; // Setup buffers OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(T), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(T), NULL); OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(T), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); globals[0] = n; locals[0] = n; // Run random tests for (uint32_t pass = 0; pass < 8; ++pass) { OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); /* Clear the dst buffer to avoid random data. */ OCL_MAP_BUFFER(2); memset(buf_data[2], 0, sizeof(T) * n); OCL_UNMAP_BUFFER(2); for (int32_t i = 0; i < (int32_t) n; ++i) { gen_rand_val(cpu_src[i]); gen_rand_val(edge[i]); } memcpy(buf_data[1], cpu_src, sizeof(T) * n); memcpy(buf_data[0], edge, sizeof(T) * n); // Run the kernel on GPU OCL_NDRANGE(1); // Run on CPU for (int32_t i = 0; i < (int32_t) n; ++i) cpu(i, edge, cpu_src, cpu_dst); // Compare OCL_MAP_BUFFER(2); //dump_data(edge, cpu_src, cpu_dst, n); OCL_ASSERT(!memcmp(buf_data[2], cpu_dst, sizeof(T) * n)); OCL_UNMAP_BUFFER(2); OCL_UNMAP_BUFFER(1); OCL_UNMAP_BUFFER(0); } } #define STEP_TEST_TYPE(TYPE) \ static void compiler_step_##TYPE (void) \ { \ OCL_CALL (cl_kernel_init, "compiler_step.cl", "compiler_step_"#TYPE, SOURCE, NULL); \ compiler_step_with_type(); \ } \ MAKE_UTEST_FROM_FUNCTION(compiler_step_##TYPE); typedef cl_vec float2; typedef cl_vec float3; typedef cl_vec float4; typedef cl_vec float8; typedef cl_vec float16; STEP_TEST_TYPE(float) STEP_TEST_TYPE(float2) STEP_TEST_TYPE(float3) STEP_TEST_TYPE(float4) STEP_TEST_TYPE(float8) STEP_TEST_TYPE(float16) template static void compiler_stepf_with_type(void) { const size_t n = 16; T cpu_dst[n], cpu_src[n]; float edge = (float)(.1f * (rand() & 15) - .75f); // Setup buffers OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(T), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(T), NULL); OCL_SET_ARG(0, sizeof(float), &edge); OCL_SET_ARG(1, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(2, sizeof(cl_mem), &buf[1]); globals[0] = n; locals[0] = n; // Run random tests for (uint32_t pass = 0; pass < 8; ++pass) { OCL_MAP_BUFFER(0); /* Clear the dst buffer to avoid random data. */ OCL_MAP_BUFFER(1); memset(buf_data[1], 0, sizeof(T) * n); OCL_UNMAP_BUFFER(1); for (int32_t i = 0; i < (int32_t) n; ++i) { gen_rand_val(cpu_src[i]); } memcpy(buf_data[0], cpu_src, sizeof(T) * n); // Run the kernel on GPU OCL_NDRANGE(1); // Run on CPU for (int32_t i = 0; i < (int32_t) n; ++i) cpu(i, edge, cpu_src, cpu_dst); // Compare OCL_MAP_BUFFER(1); //dump_data(edge, cpu_src, cpu_dst, n); OCL_ASSERT(!memcmp(buf_data[1], cpu_dst, sizeof(T) * n)); OCL_UNMAP_BUFFER(1); OCL_UNMAP_BUFFER(0); } } #define STEPF_TEST_TYPE(TYPE) \ static void compiler_stepf_##TYPE (void) \ { \ OCL_CALL (cl_kernel_init, "compiler_step.cl", "compiler_stepf_"#TYPE, SOURCE, NULL); \ compiler_stepf_with_type(); \ } \ MAKE_UTEST_FROM_FUNCTION(compiler_stepf_##TYPE); STEPF_TEST_TYPE(float) STEPF_TEST_TYPE(float2) STEPF_TEST_TYPE(float3) STEPF_TEST_TYPE(float4) STEPF_TEST_TYPE(float8) STEPF_TEST_TYPE(float16) Release_v0.3/utests/compiler_structure_attributes.cpp000066400000000000000000000002731223142177000234400ustar00rootroot00000000000000#include "utest_helper.hpp" void compiler_structure_attributes(void) { OCL_CREATE_KERNEL("compiler_structure_attributes"); } MAKE_UTEST_FROM_FUNCTION(compiler_structure_attributes); Release_v0.3/utests/compiler_switch.cpp000066400000000000000000000027671223142177000204450ustar00rootroot00000000000000#include "utest_helper.hpp" static void cpu_compiler_switch(int *dst, int *src, int get_global_id0) { switch (get_global_id0) { case 0: dst[get_global_id0] = src[get_global_id0 + 4]; break; case 1: dst[get_global_id0] = src[get_global_id0 + 14]; break; case 2: dst[get_global_id0] = src[get_global_id0 + 13]; break; case 6: dst[get_global_id0] = src[get_global_id0 + 11]; break; case 7: dst[get_global_id0] = src[get_global_id0 + 10]; break; case 10: dst[get_global_id0] = src[get_global_id0 + 9]; break; case 12: dst[get_global_id0] = src[get_global_id0 + 6]; break; default: dst[get_global_id0] = src[get_global_id0 + 8]; break; } } static void compiler_switch(void) { const size_t n = 32; int cpu_dst[32], cpu_src[32]; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_switch"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = 16; locals[0] = 16; OCL_MAP_BUFFER(1); for (uint32_t i = 0; i < 32; ++i) cpu_src[i] = ((int32_t*)buf_data[1])[i] = i; OCL_UNMAP_BUFFER(1); OCL_NDRANGE(1); OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); for (int i = 0; i < 16; ++i) cpu_compiler_switch(cpu_dst, cpu_src, i); for (int i = 0; i < 16; ++i) OCL_ASSERT(((int32_t*)buf_data[0])[i] == cpu_dst[i]); OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); } MAKE_UTEST_FROM_FUNCTION(compiler_switch) Release_v0.3/utests/compiler_type_casting.cpp000066400000000000000000000002441223142177000216210ustar00rootroot00000000000000#include "utest_helper.hpp" void compiler_type_casting(void) { OCL_CREATE_KERNEL("compiler_type_casting"); } MAKE_UTEST_FROM_FUNCTION(compiler_type_casting); Release_v0.3/utests/compiler_uint16_copy.cpp000066400000000000000000000017741223142177000213210ustar00rootroot00000000000000#include "utest_helper.hpp" static void compiler_uint16_copy(void) { const size_t n = 128; // Setup kernel and buffers. Note that uint16 is aligned on 16 bytes // according to the OCL specificatio OCL_CREATE_KERNEL("compiler_uint16_copy"); buf_data[0] = (uint32_t*) malloc(sizeof(uint32_t[16]) * n); for (uint32_t i = 0; i < n; ++i) for (uint32_t j = 0; j < 16; ++j) ((uint32_t*)buf_data[0])[16*i+j] = 16*i+j; OCL_CREATE_BUFFER(buf[0], CL_MEM_COPY_HOST_PTR, n * sizeof(uint32_t[16]), buf_data[0]); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t[16]), NULL); free(buf_data[0]); buf_data[0] = NULL; // Run the kernel OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = n; locals[0] = 16; OCL_NDRANGE(1); // Check result OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); for (uint32_t i = 0; i < 16*n; ++i) OCL_ASSERT(((uint32_t*)buf_data[0])[i] == ((uint32_t*)buf_data[1])[i]); } MAKE_UTEST_FROM_FUNCTION(compiler_uint16_copy); Release_v0.3/utests/compiler_uint2_copy.cpp000066400000000000000000000015601223142177000212250ustar00rootroot00000000000000#include "utest_helper.hpp" static void compiler_uint2_copy(void) { const size_t n = 128; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_uint2_copy"); buf_data[0] = (uint32_t*) malloc(sizeof(uint32_t[2]) * n); for (uint32_t i = 0; i < 2*n; ++i) ((uint32_t*)buf_data[0])[i] = i; OCL_CREATE_BUFFER(buf[0], CL_MEM_COPY_HOST_PTR, n * sizeof(uint32_t[2]), buf_data[0]); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t[2]), NULL); free(buf_data[0]); buf_data[0] = NULL; // Run the kernel OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = n; locals[0] = 16; OCL_NDRANGE(1); // Check result OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); for (uint32_t i = 0; i < 2*n; ++i) OCL_ASSERT(((uint32_t*)buf_data[0])[i] == ((uint32_t*)buf_data[1])[i]); } MAKE_UTEST_FROM_FUNCTION(compiler_uint2_copy); Release_v0.3/utests/compiler_uint3_copy.cpp000066400000000000000000000023371223142177000212310ustar00rootroot00000000000000#include "utest_helper.hpp" static void compiler_uint3_copy(void) { const size_t n = 128; // Setup kernel and buffers. Note that uint3 is aligned on 16 bytes // according to the OCL specification OCL_CREATE_KERNEL("compiler_uint3_copy"); buf_data[0] = (uint32_t*) malloc(sizeof(uint32_t[4]) * n); for (uint32_t i = 0; i < n; ++i) { ((uint32_t*)buf_data[0])[4*i+0] = 3*i+0; ((uint32_t*)buf_data[0])[4*i+1] = 3*i+1; ((uint32_t*)buf_data[0])[4*i+2] = 3*i+2; } OCL_CREATE_BUFFER(buf[0], CL_MEM_COPY_HOST_PTR, n * sizeof(uint32_t[4]), buf_data[0]); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t[4]), NULL); free(buf_data[0]); buf_data[0] = NULL; // Run the kernel OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = n; locals[0] = 16; OCL_NDRANGE(1); // Check result OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); for (uint32_t i = 0; i < n; ++i) { OCL_ASSERT(((uint32_t*)buf_data[0])[4*i+0] == ((uint32_t*)buf_data[1])[4*i+0]); OCL_ASSERT(((uint32_t*)buf_data[0])[4*i+1] == ((uint32_t*)buf_data[1])[4*i+1]); OCL_ASSERT(((uint32_t*)buf_data[0])[4*i+2] == ((uint32_t*)buf_data[1])[4*i+2]); } } MAKE_UTEST_FROM_FUNCTION(compiler_uint3_copy); Release_v0.3/utests/compiler_uint3_unaligned_copy.cpp000066400000000000000000000023771223142177000232630ustar00rootroot00000000000000#include "utest_helper.hpp" static void compiler_uint3_unaligned_copy(void) { const size_t n = 128; // Setup kernel and buffers. Note that uint3 is aligned on 16 bytes // according to the OCL specification OCL_CREATE_KERNEL("compiler_uint3_unaligned_copy"); buf_data[0] = (uint32_t*) malloc(sizeof(uint32_t[4]) * n); for (uint32_t i = 0; i < n; ++i) { ((uint32_t*)buf_data[0])[3*i+0] = 3*i+0; ((uint32_t*)buf_data[0])[3*i+1] = 3*i+1; ((uint32_t*)buf_data[0])[3*i+2] = 3*i+2; } OCL_CREATE_BUFFER(buf[0], CL_MEM_COPY_HOST_PTR, n * sizeof(uint32_t[4]), buf_data[0]); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t[4]), NULL); free(buf_data[0]); buf_data[0] = NULL; // Run the kernel OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = n; locals[0] = 16; OCL_NDRANGE(1); // Check result OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); for (uint32_t i = 0; i < n; ++i) { OCL_ASSERT(((uint32_t*)buf_data[0])[3*i+0] == ((uint32_t*)buf_data[1])[3*i+0]); OCL_ASSERT(((uint32_t*)buf_data[0])[3*i+1] == ((uint32_t*)buf_data[1])[3*i+1]); OCL_ASSERT(((uint32_t*)buf_data[0])[3*i+2] == ((uint32_t*)buf_data[1])[3*i+2]); } } MAKE_UTEST_FROM_FUNCTION(compiler_uint3_unaligned_copy); Release_v0.3/utests/compiler_uint8_copy.cpp000066400000000000000000000017621223142177000212370ustar00rootroot00000000000000#include "utest_helper.hpp" static void compiler_uint8_copy(void) { const size_t n = 128; // Setup kernel and buffers. Note that uint8 is aligned on 16 bytes // according to the OCL specification OCL_CREATE_KERNEL("compiler_uint8_copy"); buf_data[0] = (uint32_t*) malloc(sizeof(uint32_t[8]) * n); for (uint32_t i = 0; i < n; ++i) for (uint32_t j = 0; j < 8; ++j) ((uint32_t*)buf_data[0])[8*i+j] = 8*i+j; OCL_CREATE_BUFFER(buf[0], CL_MEM_COPY_HOST_PTR, n * sizeof(uint32_t[8]), buf_data[0]); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t[8]), NULL); free(buf_data[0]); buf_data[0] = NULL; // Run the kernel OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = n; locals[0] = 16; OCL_NDRANGE(1); // Check result OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); for (uint32_t i = 0; i < 8*n; ++i) OCL_ASSERT(((uint32_t*)buf_data[0])[i] == ((uint32_t*)buf_data[1])[i]); } MAKE_UTEST_FROM_FUNCTION(compiler_uint8_copy); Release_v0.3/utests/compiler_unstructured_branch0.cpp000066400000000000000000000031171223142177000232760ustar00rootroot00000000000000#include "utest_helper.hpp" static void compiler_unstructured_branch0(void) { const size_t n = 32; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_unstructured_branch0"); buf_data[0] = (uint32_t*) malloc(sizeof(uint32_t) * n); for (uint32_t i = 0; i < n; ++i) ((uint32_t*)buf_data[0])[i] = 2; OCL_CREATE_BUFFER(buf[0], CL_MEM_COPY_HOST_PTR, n * sizeof(uint32_t), buf_data[0]); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL); free(buf_data[0]); buf_data[0] = NULL; // Run the kernel OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = 16; locals[0] = 16; OCL_NDRANGE(1); // First control flow OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); for (uint32_t i = 0; i < 16; ++i) OCL_ASSERT(((int32_t*)buf_data[1])[i] == 2); for (uint32_t i = 16; i < 32; ++i) OCL_ASSERT(((int32_t*)buf_data[1])[i] == 1); // Second control flow for (uint32_t i = 0; i < n; ++i) ((int32_t*)buf_data[0])[i] = -2; OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); OCL_NDRANGE(1); OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); for (uint32_t i = 0; i < 32; ++i) OCL_ASSERT(((int32_t*)buf_data[1])[i] == 1); // Third control flow for (uint32_t i = 0; i < 8; ++i) ((int32_t*)buf_data[0])[i] = 2; OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); OCL_NDRANGE(1); OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); for (uint32_t i = 0; i < 8; ++i) OCL_ASSERT(((int32_t*)buf_data[1])[i] == 2); for (uint32_t i = 8; i < 32; ++i) OCL_ASSERT(((int32_t*)buf_data[1])[i] == 1); } MAKE_UTEST_FROM_FUNCTION(compiler_unstructured_branch0); Release_v0.3/utests/compiler_unstructured_branch1.cpp000066400000000000000000000030731223142177000233000ustar00rootroot00000000000000#include "utest_helper.hpp" static void compiler_unstructured_branch1(void) { const size_t n = 16; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_unstructured_branch1"); buf_data[0] = (uint32_t*) malloc(sizeof(uint32_t) * n); for (uint32_t i = 0; i < n; ++i) ((uint32_t*)buf_data[0])[i] = 2; OCL_CREATE_BUFFER(buf[0], CL_MEM_COPY_HOST_PTR, n * sizeof(uint32_t), buf_data[0]); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL); free(buf_data[0]); buf_data[0] = NULL; // Run the kernel OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = 16; locals[0] = 16; OCL_NDRANGE(1); // First control flow OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); for (uint32_t i = 0; i < n; ++i) OCL_ASSERT(((int32_t*)buf_data[1])[i] == 2); // Second control flow for (uint32_t i = 0; i < n; ++i) ((int32_t*)buf_data[0])[i] = -2; OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); OCL_NDRANGE(1); OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); for (uint32_t i = 0; i < n; ++i) OCL_ASSERT(((uint32_t*)buf_data[1])[i] == 3); // Third control flow for (uint32_t i = 0; i < 8; ++i) ((int32_t*)buf_data[0])[i] = 2; for (uint32_t i = 8; i < n; ++i) ((int32_t*)buf_data[0])[i] = -2; OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); OCL_NDRANGE(1); OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); for (uint32_t i = 0; i < 8; ++i) OCL_ASSERT(((int32_t*)buf_data[1])[i] == 2); for (uint32_t i = 8; i < n; ++i) OCL_ASSERT(((int32_t*)buf_data[1])[i] == 3); } MAKE_UTEST_FROM_FUNCTION(compiler_unstructured_branch1); Release_v0.3/utests/compiler_unstructured_branch2.cpp000066400000000000000000000040661223142177000233040ustar00rootroot00000000000000#include "utest_helper.hpp" static void compiler_unstructured_branch2(void) { const size_t n = 16; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_unstructured_branch2"); buf_data[0] = (uint32_t*) malloc(sizeof(uint32_t) * n); for (uint32_t i = 0; i < n; ++i) ((uint32_t*)buf_data[0])[i] = 2; OCL_CREATE_BUFFER(buf[0], CL_MEM_COPY_HOST_PTR, n * sizeof(uint32_t), buf_data[0]); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL); free(buf_data[0]); buf_data[0] = NULL; // Run the kernel OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = 16; locals[0] = 16; OCL_NDRANGE(1); // First control flow OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); for (uint32_t i = 0; i < n; ++i) OCL_ASSERT(((int32_t*)buf_data[1])[i] == 12); // Second control flow for (uint32_t i = 0; i < n; ++i) ((int32_t*)buf_data[0])[i] = -2; OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); OCL_NDRANGE(1); OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); for (uint32_t i = 0; i < n; ++i) OCL_ASSERT(((int32_t*)buf_data[1])[i] == -6); // Third control flow for (uint32_t i = 0; i < 8; ++i) ((int32_t*)buf_data[0])[i] = 2; for (uint32_t i = 8; i < n; ++i) ((int32_t*)buf_data[0])[i] = -2; OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); OCL_NDRANGE(1); OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); for (uint32_t i = 0; i < 8; ++i) OCL_ASSERT(((int32_t*)buf_data[1])[i] == 12); for (uint32_t i = 8; i < n; ++i) OCL_ASSERT(((int32_t*)buf_data[1])[i] == -6); // Fourth control flow for (uint32_t i = 0; i < 4; ++i) ((int32_t*)buf_data[0])[i] = 1; for (uint32_t i = 4; i < 8; ++i) ((int32_t*)buf_data[0])[i] = 2; for (uint32_t i = 8; i < n; ++i) ((int32_t*)buf_data[0])[i] = -2; OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); OCL_NDRANGE(1); OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); for (uint32_t i = 0; i < 8; ++i) OCL_ASSERT(((int32_t*)buf_data[1])[i] == 12); for (uint32_t i = 8; i < n; ++i) OCL_ASSERT(((int32_t*)buf_data[1])[i] == -6); } MAKE_UTEST_FROM_FUNCTION(compiler_unstructured_branch2); Release_v0.3/utests/compiler_unstructured_branch3.cpp000066400000000000000000000031451223142177000233020ustar00rootroot00000000000000#include "utest_helper.hpp" static void compiler_unstructured_branch3(void) { const size_t n = 16; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_unstructured_branch3"); buf_data[0] = (uint32_t*) malloc(sizeof(uint32_t) * n); for (uint32_t i = 0; i < n; ++i) ((uint32_t*)buf_data[0])[i] = 2; OCL_CREATE_BUFFER(buf[0], CL_MEM_COPY_HOST_PTR, n * sizeof(uint32_t), buf_data[0]); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL); free(buf_data[0]); buf_data[0] = NULL; // Run the kernel OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = 16; locals[0] = 16; OCL_NDRANGE(1); // First control flow OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); for (uint32_t i = 0; i < n; ++i) OCL_ASSERT(((int32_t*)buf_data[1])[i] == 2); // Second control flow for (uint32_t i = 0; i < n; ++i) ((int32_t*)buf_data[0])[i] = 0; OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); OCL_NDRANGE(1); OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); for (uint32_t i = 0; i < n; ++i) OCL_ASSERT(((uint32_t*)buf_data[1])[i] == 3); // Third control flow OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); for (uint32_t i = 0; i < 8; ++i) ((int32_t*)buf_data[0])[i] = 2; for (uint32_t i = 8; i < n; ++i) ((int32_t*)buf_data[0])[i] = 0; OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); OCL_NDRANGE(1); OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); for (uint32_t i = 0; i < 8; ++i) OCL_ASSERT(((int32_t*)buf_data[1])[i] == 2); for (uint32_t i = 8; i < n; ++i) OCL_ASSERT(((int32_t*)buf_data[1])[i] == 3); } MAKE_UTEST_FROM_FUNCTION(compiler_unstructured_branch3); Release_v0.3/utests/compiler_upsample_int.cpp000066400000000000000000000017271223142177000216370ustar00rootroot00000000000000#include "utest_helper.hpp" void compiler_upsample_int(void) { const int n = 32; short src1[n]; unsigned short src2[n]; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_upsample_int"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(short), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(short), NULL); OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(int), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); globals[0] = n; locals[0] = 16; OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); for (int i = 0; i < n; ++i) { src1[i] = ((short*)buf_data[0])[i] = rand(); src2[i] = ((short*)buf_data[1])[i] = rand(); } OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); OCL_NDRANGE(1); OCL_MAP_BUFFER(2); for (int i = 0; i < n; ++i) OCL_ASSERT(((int*)buf_data[2])[i] == (int)((src1[i] << 16) | src2[i])); OCL_UNMAP_BUFFER(2); } MAKE_UTEST_FROM_FUNCTION(compiler_upsample_int); Release_v0.3/utests/compiler_upsample_long.cpp000066400000000000000000000020021223142177000217670ustar00rootroot00000000000000#include #include "utest_helper.hpp" void compiler_upsample_long(void) { const int n = 32; int src1[n]; unsigned int src2[n]; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_upsample_long"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(unsigned int), NULL); OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(int64_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); globals[0] = n; locals[0] = 16; OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); for (int i = 0; i < n; ++i) { src1[i] = ((int*)buf_data[0])[i] = rand(); src2[i] = ((unsigned int*)buf_data[1])[i] = rand(); } OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); OCL_NDRANGE(1); OCL_MAP_BUFFER(2); for (int i = 0; i < n; ++i) OCL_ASSERT(((int64_t*)buf_data[2])[i] == (((int64_t)(src1[i]) << 32) | src2[i])); OCL_UNMAP_BUFFER(2); } MAKE_UTEST_FROM_FUNCTION(compiler_upsample_long); Release_v0.3/utests/compiler_vect_compare.cpp000066400000000000000000000022141223142177000215760ustar00rootroot00000000000000#include "utest_helper.hpp" typedef struct { int x; int y; int z; int w; } int4; void compiler_vect_compare(void) { const size_t n = 16; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_vect_compare"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int4), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(int4), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_MAP_BUFFER(0); for (uint32_t i = 0; i < n; ++i) { ((int4*)buf_data[0])[i].x = i & 0x1; ((int4*)buf_data[0])[i].y = i & 0x2; ((int4*)buf_data[0])[i].z = i & 0x4; ((int4*)buf_data[0])[i].w = i & 0x8; } OCL_UNMAP_BUFFER(0); globals[0] = n; locals[0] = 16; OCL_NDRANGE(1); OCL_MAP_BUFFER(1); for (uint32_t i = 0; i < 16; ++i) { OCL_ASSERT(((int4*)buf_data[1])[i].x == (int)((i&0x1)?0xffffffff:0)); OCL_ASSERT(((int4*)buf_data[1])[i].y == (int)((i&0x2)?0xffffffff:0)); OCL_ASSERT(((int4*)buf_data[1])[i].z == (int)((i&0x4)?0xffffffff:0)); OCL_ASSERT(((int4*)buf_data[1])[i].w == (int)((i&0x8)?0xffffffff:0)); } OCL_UNMAP_BUFFER(1); } MAKE_UTEST_FROM_FUNCTION(compiler_vect_compare); Release_v0.3/utests/compiler_vector_inc.cpp000066400000000000000000000017201223142177000212630ustar00rootroot00000000000000#include #include #include #include "utest_helper.hpp" void compiler_vector_inc(void) { const int n = 64; char dst[n]; char src[n]; OCL_CREATE_KERNEL("compiler_vector_inc"); OCL_CREATE_BUFFER(buf[0], 0, n, NULL); OCL_CREATE_BUFFER(buf[1], 0, n, NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = n / 2; locals[0] = 16; for (int i = 0; i < n; ++i) { dst[i] = i; src[i] = (i / 2) % 4; } OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); memcpy(buf_data[0], dst, n); memcpy(buf_data[1], src, n); OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); OCL_NDRANGE(1); OCL_MAP_BUFFER(0); char *dest = ((char *)buf_data[0]); for (int i=0; i template static void compiler_vector_load_store(int elemNum, const char *kernelName) { const size_t n = elemNum * 256; // Setup kernel and buffers OCL_CREATE_KERNEL_FROM_FILE("compiler_vector_load_store", kernelName); buf_data[0] = (T*) malloc(sizeof(T) * n); for (uint32_t i = 0; i < n; ++i) ((T*)buf_data[0])[i] = i; OCL_CREATE_BUFFER(buf[0], CL_MEM_COPY_HOST_PTR, n * sizeof(T), buf_data[0]); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(T), NULL); free(buf_data[0]); buf_data[0] = NULL; // Run the kernel OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = n / elemNum; locals[0] = 16; OCL_NDRANGE(1); // Check result OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); for (uint32_t i = 0; i < n; ++i) { int shift = ((i % elemNum) + 1); if (strstr(kernelName, "double") == NULL) OCL_ASSERT(((T*)buf_data[1])[i] == (T)(((T*)buf_data[0])[i] + shift)); else OCL_ASSERT((((T*)buf_data[1])[i] - ((T)((T*)buf_data[0])[i] + shift)) < 1e-5); } OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); } #define compiler_vector_load_store(type, n, kernel_type) \ static void compiler_vector_ ##kernel_type ##n ##_load_store(void)\ {\ compiler_vector_load_store(n, "test_" #kernel_type #n);\ }\ MAKE_UTEST_FROM_FUNCTION(compiler_vector_ ## kernel_type ##n ##_load_store); #define test_all_vector(type, kernel_type) \ compiler_vector_load_store(type, 2, kernel_type) \ compiler_vector_load_store(type, 3, kernel_type) \ compiler_vector_load_store(type, 4, kernel_type) \ compiler_vector_load_store(type, 8, kernel_type) \ compiler_vector_load_store(type, 16, kernel_type) test_all_vector(int8_t, char) test_all_vector(uint8_t, uchar) test_all_vector(int16_t, short) test_all_vector(uint16_t, ushort) test_all_vector(int32_t, int) test_all_vector(uint32_t, uint) test_all_vector(float, float) test_all_vector(double, double) test_all_vector(int64_t, long) test_all_vector(uint64_t, ulong) Release_v0.3/utests/compiler_volatile.cpp000066400000000000000000000002641223142177000207510ustar00rootroot00000000000000#include "utest_helper.hpp" void compiler_volatile(void) { // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_volatile"); } MAKE_UTEST_FROM_FUNCTION(compiler_volatile); Release_v0.3/utests/compiler_workitem_builtin.cpp000066400000000000000000000002571223142177000225230ustar00rootroot00000000000000#include "utest_helper.hpp" void compiler_workitem_builtin(void) { OCL_CREATE_KERNEL("compiler_workitem_builtin"); } MAKE_UTEST_FROM_FUNCTION(compiler_workitem_builtin); Release_v0.3/utests/compiler_write_only.cpp000066400000000000000000000024211223142177000213220ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #include "utest_helper.hpp" static void compiler_write_only(void) { const size_t n = 2048; // Setup kernel and buffers OCL_CREATE_KERNEL("test_write_only"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); // Run the kernel globals[0] = n; locals[0] = 16; OCL_NDRANGE(1); OCL_MAP_BUFFER(0); // Check results for (uint32_t i = 0; i < n; ++i) OCL_ASSERT(((uint32_t*)buf_data[0])[i] == i); } MAKE_UTEST_FROM_FUNCTION(compiler_write_only); Release_v0.3/utests/compiler_write_only_bytes.cpp000066400000000000000000000010221223142177000225240ustar00rootroot00000000000000#include "utest_helper.hpp" void compiler_write_only_bytes(void) { const size_t n = 32; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_write_only_bytes"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint8_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); // Run the kernel globals[0] = n; locals[0] = 16; OCL_NDRANGE(1); OCL_MAP_BUFFER(0); // Check results for (uint32_t i = 0; i < n; ++i) OCL_ASSERT(((uint8_t*)buf_data[0])[i] == 2); } MAKE_UTEST_FROM_FUNCTION(compiler_write_only_bytes); Release_v0.3/utests/compiler_write_only_shorts.cpp000066400000000000000000000010301223142177000227170ustar00rootroot00000000000000#include "utest_helper.hpp" void compiler_write_only_shorts(void) { const size_t n = 32; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_write_only_shorts"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint16_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); // Run the kernel globals[0] = n; locals[0] = 16; OCL_NDRANGE(1); OCL_MAP_BUFFER(0); // Check results for (uint32_t i = 0; i < n; ++i) OCL_ASSERT(((uint16_t*)buf_data[0])[i] == 2); } MAKE_UTEST_FROM_FUNCTION(compiler_write_only_shorts); Release_v0.3/utests/enqueue_copy_buf.cpp000066400000000000000000000032051223142177000205730ustar00rootroot00000000000000#include "utest_helper.hpp" void test_copy_buf(size_t sz, size_t src_off, size_t dst_off, size_t cb) { unsigned int i; OCL_MAP_BUFFER(0); for (i=0; i < sz; i++) { ((char*)buf_data[0])[i] = (rand() & 63); } OCL_UNMAP_BUFFER(0); if (src_off + cb > sz || dst_off + cb > sz) { /* Expect Error. */ OCL_ASSERT(clEnqueueCopyBuffer(queue, buf[0], buf[1], src_off, dst_off, cb*sizeof(char), 0, NULL, NULL)); return; } OCL_ASSERT(!clEnqueueCopyBuffer(queue, buf[0], buf[1], src_off, dst_off, cb*sizeof(char), 0, NULL, NULL)); OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); #if 0 printf("\n########### Src buffer: \n"); for (i = 0; i < cb; ++i) printf(" %2.2u", ((unsigned char*)buf_data[0])[i + src_off]); printf("\n########### dst buffer: \n"); for (i = 0; i < cb; ++i) printf(" %2.2u", ((unsigned char*)buf_data[1])[i + dst_off]); #endif // Check results for (i = 0; i < cb; ++i) { if (((char*)buf_data[0])[i + src_off] != ((char*)buf_data[1])[i + dst_off]) { printf ("different index is %d\n", i); OCL_ASSERT(0); } } OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); } void enqueue_copy_buf(void) { size_t i; size_t j; const size_t sz = 1024; OCL_CREATE_BUFFER(buf[0], 0, sz * sizeof(char), NULL); OCL_CREATE_BUFFER(buf[1], 0, sz * sizeof(char), NULL); for (i=0; i #include #include #include #include #include #include "utest_helper.hpp" using namespace std; /* ***************************************************** * * This file to test all the API like: clGetXXXXInfo * * ***************************************************** */ #define NO_STANDARD_REF 0xFFFFF template struct Info_Result { T ret; T refer; int size; typedef T type_value; void * get_ret(void) { return (void *)&ret; } Info_Result(T other) { refer = other; size = sizeof(T); } bool check_result (void) { //printf("The refer is %d, we get result is %d\n", refer, ret); if (ret != refer && refer != (T)NO_STANDARD_REF) return false; return true; } }; template <> struct Info_Result { char * ret; char * refer; int size; typedef char* type_value; Info_Result(const char *other, int sz): refer(NULL) { size = sz; ret = (char *)malloc(sizeof(char) * sz); if (other) { refer = (char *)malloc(sizeof(char) * sz); memcpy(refer, other, sz); } } ~Info_Result(void) { free(refer); free(ret); } void * get_ret(void) { return (void *)ret; } bool check_result (void) { if (refer && ::memcmp(ret, refer, size)) return false; return true; } }; template <> //Used for such as CL_PROGRAM_BINARIES struct Info_Result { char ** ret; char ** refer; int *elt_size; int size; typedef char** type_value; Info_Result(char **other, int *sz, int elt_num) { size = elt_num; ret = (char **)malloc(elt_num * sizeof(char *)); memset(ret, 0, (elt_num * sizeof(char *))); refer = (char **)malloc(elt_num * sizeof(char *)); memset(refer, 0, (elt_num * sizeof(char *))); elt_size = (int *)malloc(elt_num * sizeof(int)); memset(elt_size, 0, (elt_num * sizeof(int))); if (sz) { int i = 0; for (; i < elt_num; i++) { elt_size[i] = sz[i]; ret[i] = (char *)malloc(sz[i] * sizeof(char)); if (other[i] && elt_size[i] > 0) { refer[i] = (char *)malloc(sz[i] * sizeof(char)); memcpy(&refer[i], &other[i], sz[i]); } else refer[i] = NULL; } } } ~Info_Result(void) { int i = 0; for (; i < size; i++) { if (refer[i]) free(refer[i]); free(ret[i]); } free(ret); free(refer); free(elt_size); } void * get_ret(void) { return (void *)ret; } bool check_result (void) { int i = 0; for (; i < size; i++) { if (refer[i] && ::memcmp(ret[i], refer[i], elt_size[i])) return false; } return true; } }; template struct Traits { static bool Is_Same(void) { return false; }; }; template struct Traits { static bool Is_Same(void) { return true; }; }; template Info_Result* cast_as(void *info) { Info_Result* ret; ret = reinterpret_cast*>(info); OCL_ASSERT((Traits::type_value>::Is_Same())); return ret; } #define CALL_INFO_AND_RET(TYPE, FUNC, ...) \ do { \ cl_int ret; \ size_t ret_size; \ \ Info_Result* info = cast_as(x.second); \ ret = FUNC (__VA_ARGS__, x.first, \ info->size, info->get_ret(), &ret_size); \ OCL_ASSERT((!ret)); \ OCL_ASSERT((info->check_result())); \ delete info; \ } while(0) /* ***************************************************** * * clGetProgramInfo * * ***************************************************** */ #define CALL_PROGINFO_AND_RET(TYPE) CALL_INFO_AND_RET(TYPE, clGetProgramInfo, program) void get_program_info(void) { map maps; int expect_value; char * expect_source; int sz; char *ker_path = (char *)malloc(4096 * sizeof(char)); const char *kiss_path = getenv("OCL_KERNEL_PATH"); string line; string source_code; sprintf(ker_path, "%s/%s", kiss_path, "compiler_if_else.cl"); ifstream in(ker_path); while (getline(in,line)) { source_code = (source_code == "") ? source_code + line : source_code + "\n" + line; } free(ker_path); //cout<< source_code; source_code = source_code + "\n"; expect_source = (char *)source_code.c_str(); OCL_CREATE_KERNEL("compiler_if_else"); /* First test for clGetProgramInfo. We just have 1 devices now */ expect_value = 2;//One program, one kernel. maps.insert(make_pair(CL_PROGRAM_REFERENCE_COUNT, (void *)(new Info_Result<>(((cl_uint)expect_value))))); maps.insert(make_pair(CL_PROGRAM_CONTEXT, (void *)(new Info_Result(ctx)))); expect_value = 1; maps.insert(make_pair(CL_PROGRAM_NUM_DEVICES, (void *)(new Info_Result<>(((cl_uint)expect_value))))); maps.insert(make_pair(CL_PROGRAM_DEVICES, (void *)(new Info_Result(device)))); sz = (strlen(expect_source) + 1); maps.insert(make_pair(CL_PROGRAM_SOURCE, (void *)(new Info_Result(expect_source, sz)))); expect_value = NO_STANDARD_REF; maps.insert(make_pair(CL_PROGRAM_BINARY_SIZES, (void *)(new Info_Result((size_t)expect_value)))); sz = 4096; //big enough? expect_source = NULL; maps.insert(make_pair(CL_PROGRAM_BINARIES, (void *)(new Info_Result(&expect_source, &sz, 1)))); std::for_each(maps.begin(), maps.end(), [](pair x) { switch (x.first) { case CL_PROGRAM_REFERENCE_COUNT: case CL_PROGRAM_NUM_DEVICES: CALL_PROGINFO_AND_RET(cl_uint); break; case CL_PROGRAM_CONTEXT: CALL_PROGINFO_AND_RET(cl_context); break; case CL_PROGRAM_DEVICES: CALL_PROGINFO_AND_RET(cl_device_id); break; case CL_PROGRAM_SOURCE: CALL_PROGINFO_AND_RET(char *); break; case CL_PROGRAM_BINARY_SIZES: CALL_PROGINFO_AND_RET(size_t); break; case CL_PROGRAM_BINARIES: CALL_PROGINFO_AND_RET(char **); break; default: break; } }); } MAKE_UTEST_FROM_FUNCTION(get_program_info); /* ***************************************************** * * clGetCommandQueueInfo * * ***************************************************** */ #define CALL_QUEUEINFO_AND_RET(TYPE) CALL_INFO_AND_RET(TYPE, clGetCommandQueueInfo, queue) void get_queue_info(void) { /* use the compiler_fabs case to test us. */ const size_t n = 16; map maps; int expect_ref; cl_command_queue_properties prop; OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(float), NULL); OCL_CREATE_KERNEL("compiler_fabs"); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = 16; locals[0] = 16; OCL_MAP_BUFFER(0); for (int32_t i = 0; i < (int32_t) n; ++i) ((float*)buf_data[0])[i] = .1f * (rand() & 15) - .75f; OCL_UNMAP_BUFFER(0); // Run the kernel on GPU OCL_NDRANGE(1); /* Do our test.*/ maps.insert(make_pair(CL_QUEUE_CONTEXT, (void *)(new Info_Result(ctx)))); maps.insert(make_pair(CL_QUEUE_DEVICE, (void *)(new Info_Result(device)))); expect_ref = 1; maps.insert(make_pair(CL_QUEUE_REFERENCE_COUNT, (void *)(new Info_Result<>(((cl_uint)expect_ref))))); prop = 0; maps.insert(make_pair(CL_QUEUE_PROPERTIES, (void *)(new Info_Result( ((cl_command_queue_properties)prop))))); std::for_each(maps.begin(), maps.end(), [](pair x) { switch (x.first) { case CL_QUEUE_CONTEXT: CALL_QUEUEINFO_AND_RET(cl_context); break; case CL_QUEUE_DEVICE: CALL_QUEUEINFO_AND_RET(cl_device_id); break; case CL_QUEUE_REFERENCE_COUNT: CALL_QUEUEINFO_AND_RET(cl_uint); break; case CL_QUEUE_PROPERTIES: CALL_QUEUEINFO_AND_RET(cl_command_queue_properties); break; default: break; } }); } MAKE_UTEST_FROM_FUNCTION(get_queue_info); /* ***************************************************** * * clGetProgramBuildInfo * * ***************************************************** */ #define CALL_PROG_BUILD_INFO_AND_RET(TYPE) CALL_INFO_AND_RET(TYPE, \ clGetProgramBuildInfo, program, device) void get_program_build_info(void) { map maps; cl_build_status expect_status; char build_opt[] = "-emit-llvm"; char log[] = ""; int sz; OCL_CALL (cl_kernel_init, "compiler_if_else.cl", "compiler_if_else", SOURCE, build_opt); /* Do our test.*/ expect_status = CL_BUILD_SUCCESS; maps.insert(make_pair(CL_PROGRAM_BUILD_STATUS, (void *)(new Info_Result(expect_status)))); sz = strlen(build_opt) + 1; maps.insert(make_pair(CL_PROGRAM_BUILD_OPTIONS, (void *)(new Info_Result(build_opt, sz)))); sz = strlen(log) + 1; maps.insert(make_pair(CL_PROGRAM_BUILD_LOG, /* not supported now, just "" */ (void *)(new Info_Result(log, sz)))); std::for_each(maps.begin(), maps.end(), [](pair x) { switch (x.first) { case CL_PROGRAM_BUILD_STATUS: CALL_PROG_BUILD_INFO_AND_RET(cl_build_status); break; case CL_PROGRAM_BUILD_OPTIONS: CALL_PROG_BUILD_INFO_AND_RET(char *); break; case CL_PROGRAM_BUILD_LOG: CALL_PROG_BUILD_INFO_AND_RET(char *); break; default: break; } }); } MAKE_UTEST_FROM_FUNCTION(get_program_build_info); /* ***************************************************** * * clGetContextInfo * * ***************************************************** */ #define CALL_CONTEXTINFO_AND_RET(TYPE) CALL_INFO_AND_RET(TYPE, clGetContextInfo, ctx) void get_context_info(void) { /* use the compiler_fabs case to test us. */ const size_t n = 16; map maps; int expect_ref; OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(float), NULL); OCL_CREATE_KERNEL("compiler_fabs"); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = 16; locals[0] = 16; OCL_MAP_BUFFER(0); for (int32_t i = 0; i < (int32_t) n; ++i) ((float*)buf_data[0])[i] = .1f * (rand() & 15) - .75f; OCL_UNMAP_BUFFER(0); // Run the kernel on GPU OCL_NDRANGE(1); /* Do our test.*/ expect_ref = 1; maps.insert(make_pair(CL_CONTEXT_NUM_DEVICES, (void *)(new Info_Result(expect_ref)))); maps.insert(make_pair(CL_CONTEXT_DEVICES, (void *)(new Info_Result(device)))); // reference count seems depends on the implementation expect_ref = NO_STANDARD_REF; maps.insert(make_pair(CL_CONTEXT_REFERENCE_COUNT, (void *)(new Info_Result<>(((cl_uint)expect_ref))))); maps.insert(make_pair(CL_CONTEXT_PROPERTIES, (void *)(new Info_Result( (const char*)NULL, 100*sizeof(cl_context_properties))))); std::for_each(maps.begin(), maps.end(), [](pair x) { switch (x.first) { case CL_CONTEXT_NUM_DEVICES: CALL_CONTEXTINFO_AND_RET(cl_uint); break; case CL_CONTEXT_DEVICES: CALL_CONTEXTINFO_AND_RET(cl_device_id); break; case CL_CONTEXT_REFERENCE_COUNT: CALL_CONTEXTINFO_AND_RET(cl_uint); break; case CL_CONTEXT_PROPERTIES: CALL_CONTEXTINFO_AND_RET(char*); break; default: break; } }); } MAKE_UTEST_FROM_FUNCTION(get_context_info); /* ***************************************************** * * clGetKernelInfo * * ***************************************************** */ #define CALL_KERNELINFO_AND_RET(TYPE) CALL_INFO_AND_RET(TYPE, clGetKernelInfo, kernel) void get_kernel_info(void) { /* use the compiler_fabs case to test us. */ const size_t n = 16; map maps; int expect_ref; OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(float), NULL); OCL_CREATE_KERNEL("compiler_fabs"); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); // Run the kernel on GPU maps.insert(make_pair(CL_KERNEL_PROGRAM, (void *)(new Info_Result(program)))); maps.insert(make_pair(CL_KERNEL_CONTEXT, (void *)(new Info_Result(ctx)))); // reference count seems depends on the implementation expect_ref = NO_STANDARD_REF; maps.insert(make_pair(CL_KERNEL_REFERENCE_COUNT, (void *)(new Info_Result<>(((cl_uint)expect_ref))))); expect_ref = 2; maps.insert(make_pair(CL_KERNEL_NUM_ARGS, (void *)(new Info_Result(expect_ref)))); const char * expected_name = "compiler_fabs"; maps.insert(make_pair(CL_KERNEL_FUNCTION_NAME, (void *)(new Info_Result(expected_name, strlen(expected_name)+1)))); std::for_each(maps.begin(), maps.end(), [](pair x) { switch (x.first) { case CL_KERNEL_PROGRAM: CALL_KERNELINFO_AND_RET(cl_program); break; case CL_KERNEL_CONTEXT: CALL_KERNELINFO_AND_RET(cl_context); break; case CL_KERNEL_REFERENCE_COUNT: CALL_KERNELINFO_AND_RET(cl_uint); break; case CL_KERNEL_NUM_ARGS: CALL_KERNELINFO_AND_RET(cl_uint); break; case CL_KERNEL_FUNCTION_NAME: CALL_KERNELINFO_AND_RET(char*); break; default: break; } }); } MAKE_UTEST_FROM_FUNCTION(get_kernel_info); /* ***************************************************** * * clGetImageInfo * * ***************************************************** */ void get_image_info(void) { const size_t w = 512; const size_t h = 512; cl_image_format format; format.image_channel_order = CL_RGBA; format.image_channel_data_type = CL_UNSIGNED_INT8; OCL_CREATE_IMAGE2D(buf[0], 0, &format, w, h, 0, NULL); cl_mem image = buf[0]; cl_image_format ret_format; OCL_CALL(clGetImageInfo, image, CL_IMAGE_FORMAT, sizeof(ret_format), &ret_format, NULL); OCL_ASSERT(format.image_channel_order == ret_format.image_channel_order); OCL_ASSERT(format.image_channel_data_type == ret_format.image_channel_data_type); size_t element_size; OCL_CALL(clGetImageInfo, image, CL_IMAGE_ELEMENT_SIZE, sizeof(element_size), &element_size, NULL); OCL_ASSERT(element_size == 4); size_t row_pitch; OCL_CALL(clGetImageInfo, image, CL_IMAGE_ROW_PITCH, sizeof(row_pitch), &row_pitch, NULL); OCL_ASSERT(row_pitch == 4 * w); size_t slice_pitch; OCL_CALL(clGetImageInfo, image, CL_IMAGE_SLICE_PITCH, sizeof(slice_pitch), &slice_pitch, NULL); OCL_ASSERT(slice_pitch == 0); size_t width; OCL_CALL(clGetImageInfo, image, CL_IMAGE_WIDTH, sizeof(width), &width, NULL); OCL_ASSERT(width == w); size_t height; OCL_CALL(clGetImageInfo, image, CL_IMAGE_HEIGHT, sizeof(height), &height, NULL); OCL_ASSERT(height == h); size_t depth; OCL_CALL(clGetImageInfo, image, CL_IMAGE_DEPTH, sizeof(depth), &depth, NULL); OCL_ASSERT(depth == 1); } MAKE_UTEST_FROM_FUNCTION(get_image_info); /* ***************************************************** * * clGetMemObjectInfo * * ***************************************************** */ #define CALL_GETMEMINFO_AND_RET(TYPE) CALL_INFO_AND_RET(TYPE, clGetMemObjectInfo, (buf[0])) void get_mem_info(void) { map maps; int expect_ref; OCL_CREATE_BUFFER(buf[0], 0, 64, NULL); void * map_ptr = clEnqueueMapBuffer(queue, buf[0], 1, CL_MAP_READ, 0, 64, 0, NULL, NULL, NULL); expect_ref = CL_MEM_OBJECT_BUFFER; maps.insert(make_pair(CL_MEM_TYPE, (void *)(new Info_Result((cl_mem_object_type)expect_ref)))); expect_ref = 0; maps.insert(make_pair(CL_MEM_FLAGS, (void *)(new Info_Result(expect_ref)))); expect_ref = 64; maps.insert(make_pair(CL_MEM_SIZE, (void *)(new Info_Result(((size_t)expect_ref))))); expect_ref = 0; maps.insert(make_pair(CL_MEM_HOST_PTR, (void *)(new Info_Result(((size_t)expect_ref))))); expect_ref = 1; maps.insert(make_pair(CL_MEM_MAP_COUNT, (void *)(new Info_Result(((cl_uint)expect_ref))))); expect_ref = 1; maps.insert(make_pair(CL_MEM_REFERENCE_COUNT, (void *)(new Info_Result(((cl_uint)expect_ref))))); maps.insert(make_pair(CL_MEM_CONTEXT, (void *)(new Info_Result(((cl_context)ctx))))); std::for_each(maps.begin(), maps.end(), [](pair x) { switch (x.first) { case CL_MEM_TYPE: CALL_GETMEMINFO_AND_RET(cl_mem_object_type); break; case CL_MEM_FLAGS: CALL_GETMEMINFO_AND_RET(cl_mem_flags); break; case CL_MEM_SIZE: CALL_GETMEMINFO_AND_RET(size_t); break; case CL_MEM_HOST_PTR: CALL_GETMEMINFO_AND_RET(size_t); break; case CL_MEM_MAP_COUNT: CALL_GETMEMINFO_AND_RET(cl_uint); break; case CL_MEM_REFERENCE_COUNT: CALL_GETMEMINFO_AND_RET(cl_uint); break; case CL_MEM_CONTEXT: CALL_GETMEMINFO_AND_RET(cl_context); break; default: break; } }); clEnqueueUnmapMemObject(queue, buf[0], map_ptr, 0, NULL, NULL); } MAKE_UTEST_FROM_FUNCTION(get_mem_info); Release_v0.3/utests/load_program_from_bin.cpp000066400000000000000000000044501223142177000215620ustar00rootroot00000000000000#include "utest_helper.hpp" #include "utest_file_map.hpp" #include #include using namespace std; static void cpu(int global_id, float *src, float *dst) { dst[global_id] = ceilf(src[global_id]); } static void test_load_program_from_bin(void) { const size_t n = 16; float cpu_dst[16], cpu_src[16]; cl_int status; cl_int binary_status; char *ker_path = NULL; cl_file_map_t *fm = cl_file_map_new(); ker_path = cl_do_kiss_path("compiler_ceil.bin", device); OCL_ASSERT (cl_file_map_open(fm, ker_path) == CL_FILE_MAP_SUCCESS); const unsigned char *src = (const unsigned char *)cl_file_map_begin(fm); const size_t sz = cl_file_map_size(fm); program = clCreateProgramWithBinary(ctx, 1, &device, &sz, &src, &binary_status, &status); OCL_ASSERT(program && status == CL_SUCCESS); /* OCL requires to build the program even if it is created from a binary */ OCL_ASSERT(clBuildProgram(program, 1, &device, NULL, NULL, NULL) == CL_SUCCESS); kernel = clCreateKernel(program, "compiler_ceil", &status); OCL_ASSERT(status == CL_SUCCESS); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(float), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = 16; locals[0] = 16; // Run random tests for (uint32_t pass = 0; pass < 8; ++pass) { OCL_MAP_BUFFER(0); for (int32_t i = 0; i < (int32_t) n; ++i) cpu_src[i] = ((float*)buf_data[0])[i] = .1f * (rand() & 15) - .75f; OCL_UNMAP_BUFFER(0); // Run the kernel on GPU OCL_NDRANGE(1); // Run on CPU for (int32_t i = 0; i < (int32_t) n; ++i) cpu(i, cpu_src, cpu_dst); // Compare OCL_MAP_BUFFER(1); #if 0 printf("#### GPU:\n"); for (int32_t i = 0; i < (int32_t) n; ++i) printf(" %f", ((float *)buf_data[1])[i]); printf("\n#### CPU:\n"); for (int32_t i = 0; i < (int32_t) n; ++i) printf(" %f", cpu_dst[i]); printf("\n"); #endif for (int32_t i = 0; i < (int32_t) n; ++i) OCL_ASSERT(((float *)buf_data[1])[i] == cpu_dst[i]); OCL_UNMAP_BUFFER(1); } } MAKE_UTEST_FROM_FUNCTION(test_load_program_from_bin); Release_v0.3/utests/my_test.cpp000066400000000000000000000044761223142177000167350ustar00rootroot00000000000000#include "utest_helper.hpp" struct seg { unsigned int end, color, offset; seg(int e, int c):end(e), color(c) {} }; typedef struct seg seg; typedef struct { std::vector segs; } rle_data; struct rle_image { int width, height; std::vector data; rle_image(int w, int h):width(w), height(h) {} }; typedef struct rle_image rle_image; static void read_data(const char *filename, rle_image &image) { FILE *fp; char line[4096]; int i; fp = fopen(filename, "r"); for (i = 0; i < image.height; i++) { char *nptr = line, *endptr; rle_data d; int start = 0; if (fgets(line, sizeof(line), fp) == NULL) break; for (;;) { int len = strtol(nptr, &endptr, 10); nptr = endptr; int color = strtol(nptr, &endptr, 10); nptr = endptr; seg s(start + len, color); d.segs.push_back(s); if (*endptr == '\n' || *endptr == 0) break; start += len; } image.data.push_back(d); } fclose(fp); } static void prepare_rle_buffer(rle_image &image, std::vector &rle_buffer, int *offsets) { int offset = 0; for (int i = 0; i < image.height; i++) { unsigned int j; rle_data d = image.data[i]; for (j = 0; j < d.segs.size(); j++) { rle_buffer.push_back(d.segs[j].end); rle_buffer.push_back(d.segs[j].color); } offsets[i] = offset; offset += j; } } static void expand_rle(rle_image &image) { std::vector rle_buffer; int offsets[image.height]; int w = image.width/16; prepare_rle_buffer(image, rle_buffer, offsets); OCL_CREATE_KERNEL("my_test"); OCL_CREATE_BUFFER(buf[0], CL_MEM_COPY_HOST_PTR, 2*sizeof(int)*rle_buffer.size(), &rle_buffer[0]); OCL_CREATE_BUFFER(buf[1], CL_MEM_COPY_HOST_PTR, sizeof(int)*image.height, offsets); OCL_CREATE_BUFFER(buf[2], 0, image.width*image.height, NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); OCL_SET_ARG(3, sizeof(w), &w); globals[0] = image.height; locals[0] = 16; OCL_NDRANGE(1); #if 1 OCL_MAP_BUFFER(2); for (int i = 0; i < image.height; i++) { for (int j = 0; j < image.width; j++) printf("%d ", ((unsigned char*)buf_data[2])[i*image.width+j]); printf("\n****\n"); } OCL_UNMAP_BUFFER(2); #endif } static void my_test(void) { rle_image image(256, 256); read_data("new_data.txt", image); expand_rle(image); } MAKE_UTEST_FROM_FUNCTION(my_test); Release_v0.3/utests/new_data.txt000066400000000000000000000056001223142177000170560ustar00rootroot000000000000006 5 3 4 37 15 10 2 200 3 156 1 97 200 3 3 2 1 2 10 128 2 124 25 5 5 251 0 256 0 256 0 256 0 256 0 256 0 256 0 256 1 256 2 256 3 256 0 256 0 256 0 256 1 256 2 256 3 256 0 256 0 256 0 256 0 256 0 256 4 256 5 256 6 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 3 100 255 100 155 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 0 100 255 56 0 100 253 100 255 56 0 56 0 20 8 180 9 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 256 0 1 253 5 252 150 168 100 254 150 168 100 254 1 253 5 252 Release_v0.3/utests/runtime_createcontext.cpp000066400000000000000000000004601223142177000216510ustar00rootroot00000000000000#include "utest_helper.hpp" void runtime_createcontextfromtype(void) { cl_int status; if (clCreateContextFromType(NULL, CL_DEVICE_TYPE_GPU, NULL, NULL, &status) == NULL) { OCL_THROW_ERROR("runtime_createcontextfromtype", status); } } MAKE_UTEST_FROM_FUNCTION(runtime_createcontextfromtype); Release_v0.3/utests/runtime_event.cpp000066400000000000000000000034031223142177000201220ustar00rootroot00000000000000#include "utest_helper.hpp" #define BUFFERSIZE 32*1024 void runtime_event(void) { const size_t n = BUFFERSIZE; cl_int cpu_src[BUFFERSIZE]; cl_event ev[3]; cl_int status = 0; cl_int value = 34; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_event"); OCL_CREATE_BUFFER(buf[0], 0, BUFFERSIZE*sizeof(int), NULL); for(cl_uint i=0; i= CL_SUBMITTED); } buf_data[0] = clEnqueueMapBuffer(queue, buf[0], CL_TRUE, 0, 0, BUFFERSIZE*sizeof(int), 1, &ev[2], NULL, NULL); OCL_SET_USER_EVENT_STATUS(ev[0], CL_COMPLETE); clGetEventInfo(ev[0], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(status), &status, NULL); OCL_ASSERT(status == CL_COMPLETE); OCL_FINISH(); for (cl_uint i = 0; i != sizeof(ev) / sizeof(cl_event); ++i) { clGetEventInfo(ev[i], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(status), &status, NULL); OCL_ASSERT(status <= CL_COMPLETE); } for (uint32_t i = 0; i < n; ++i) { OCL_ASSERT(((int*)buf_data[0])[i] == (int)value + 0x3); } clEnqueueUnmapMemObject(queue, buf[0], buf_data[0], 0, NULL, NULL); for (cl_uint i = 0; i != sizeof(ev) / sizeof(cl_event); ++i) { clReleaseEvent(ev[i]); } } MAKE_UTEST_FROM_FUNCTION(runtime_event); Release_v0.3/utests/runtime_flat_address_space.cpp000066400000000000000000000045351223142177000226160ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #include "utest_helper.hpp" int main(int argc, char *argv[]) { cl_mem dst[24]; int *dst_buffer = NULL; const size_t n = 32 * 1024 * 1024; const size_t global_work_size = n; const size_t local_work_size = 16; int status = 0; if ((status = cl_test_init("test_write_only.cl", "test_write_only", SOURCE)) != 0) goto error; for (uint32_t j = 0; j < 24; ++j) { // Allocate the two buffers dst[j] = clCreateBuffer(ctx, 0, n * sizeof(uint32_t), NULL, &status); if (status != CL_SUCCESS) goto error; // Set source and destination OCL_CALL (clSetKernelArg, kernel, 0, sizeof(cl_mem), &dst[j]); // Run the kernel OCL_CALL (clEnqueueNDRangeKernel, queue, kernel, 1, NULL, &global_work_size, &local_work_size, 0, NULL, NULL); // Be sure that everything run fine dst_buffer = (int *) clMapBufferIntel(dst[j], &status); if (status != CL_SUCCESS) goto error; for (uint32_t i = 0; i < n; ++i) if (dst_buffer[i] != int(i)) { fprintf(stderr, "run-time flat address space failed\n"); exit(-1); } OCL_CALL (clUnmapBufferIntel, dst[j]); } for (uint32_t j = 0; j < 24; ++j) OCL_CALL (clReleaseMemObject, dst[j]); cl_test_destroy(); printf("%i memory leaks\n", clReportUnfreedIntel()); assert(clReportUnfreedIntel() == 0); error: return status; } Release_v0.3/utests/runtime_null_kernel_arg.cpp000066400000000000000000000011601223142177000221420ustar00rootroot00000000000000#include "utest_helper.hpp" void runtime_null_kernel_arg(void) { const size_t n = 32; // Setup kernel and buffers OCL_CREATE_KERNEL("null_kernel_arg"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), NULL); OCL_SET_ARG(2, sizeof(cl_mem), NULL); // Run the kernel globals[0] = n; locals[0] = 16; OCL_NDRANGE(1); OCL_MAP_BUFFER(0); // Check results for (uint32_t i = 0; i < n; ++i) OCL_ASSERT(((uint32_t*)buf_data[0])[i] == i); OCL_UNMAP_BUFFER(0); } MAKE_UTEST_FROM_FUNCTION(runtime_null_kernel_arg); Release_v0.3/utests/utest.cpp000066400000000000000000000055551223142177000164140ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file utest.cpp * \author Benjamin Segovia */ #include "utest.hpp" #include "utest_helper.hpp" #include #include #include #include using namespace std; vector *UTest::utestList = NULL; void releaseUTestList(void) { delete UTest::utestList; } UTest::UTest(Function fn, const char *name, bool haveIssue) : fn(fn), name(name), haveIssue(haveIssue) { if (utestList == NULL) { utestList = new vector; atexit(releaseUTestList); } utestList->push_back(*this); } UTest::UTest(void) : fn(NULL), name(NULL), haveIssue(false) {} static bool strequal(const char *s1, const char *s2) { if (strcmp(s1, s2) == 0) return true; return false; } void UTest::run(const char *name) { if (name == NULL) return; if (utestList == NULL) return; for (size_t i = 0; i < utestList->size(); ++i) { const UTest &utest = (*utestList)[i]; if (utest.name == NULL || utest.fn == NULL ) continue; if (strequal(utest.name, name)) { std::cout << utest.name << ":" << std::endl; (utest.fn)(); std::cout << std::endl; cl_kernel_destroy(); cl_buffer_destroy(); } } } void UTest::runAll(void) { if (utestList == NULL) return; for (size_t i = 0; i < utestList->size(); ++i) { const UTest &utest = (*utestList)[i]; if (utest.fn == NULL) continue; std::cout << utest.name << ":" << std::endl; (utest.fn)(); std::cout << std::endl; cl_kernel_destroy(); cl_buffer_destroy(); } } void UTest::runAllNoIssue(void) { if (utestList == NULL) return; for (size_t i = 0; i < utestList->size(); ++i) { const UTest &utest = (*utestList)[i]; if (utest.fn == NULL || utest.haveIssue) continue; std::cout << utest.name << ":" << std::endl; (utest.fn)(); std::cout << std::endl; cl_kernel_destroy(); cl_buffer_destroy(); } } void UTest::listAllCases() { if (utestList == NULL) return; for (size_t i = 0; i < utestList->size(); ++i) { const UTest &utest = (*utestList)[i]; if (utest.fn == NULL) continue; std::cout << utest.name << std::endl; } } Release_v0.3/utests/utest.hpp000066400000000000000000000060721223142177000164140ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file utest.hpp * \author Benjamin Segovia * * Provides all unit test capabilites. It is rather rudimentary but it should * do the job */ #ifndef __UTEST_UTEST_HPP__ #define __UTEST_UTEST_HPP__ #include "utest_exception.hpp" #include #include /*! Quick and dirty unit test system with registration */ struct UTest { /*! A unit test function to run */ typedef void (*Function) (void); /*! Empty test */ UTest(void); /*! Build a new unit test and append it to the unit test list */ UTest(Function fn, const char *name, bool haveIssue = false); /*! Function to execute */ Function fn; /*! Name of the test */ const char *name; /*! Indicate whether current test cases has issue to be fixes */ bool haveIssue; /*! The tests that are registered */ static std::vector *utestList; /*! Run the test with the given name */ static void run(const char *name); /*! Run all the tests without known issue*/ static void runAllNoIssue(void); /*! Run all the tests */ static void runAll(void); /*! List all test cases */ static void listAllCases(void); }; /*! Register a new unit test */ #define UTEST_REGISTER(FN) static const UTest __##FN##__(FN, #FN); /*! Turn a function into a unit test */ #define MAKE_UTEST_FROM_FUNCTION(FN) \ static void __ANON__##FN##__(void) { UTEST_EXPECT_SUCCESS(FN()); } \ static const UTest __##FN##__(__ANON__##FN##__, #FN); /*! Register a test case which has issue to be fixed */ #define MAKE_UTEST_FROM_FUNCTION_WITH_ISSUE(FN) \ static void __ANON__##FN##__(void) { UTEST_EXPECT_SUCCESS(FN()); } \ static const UTest __##FN##__(__ANON__##FN##__, #FN, true); /*! No assert is expected */ #define UTEST_EXPECT_SUCCESS(EXPR) \ do { \ try { \ EXPR; \ std::cout << " " << #EXPR << " [SUCCESS]" << std::endl; \ } \ catch (Exception e) { \ std::cout << " " << #EXPR << " [FAILED]" << std::endl; \ std::cout << " " << e.what() << std::endl; \ } \ } while (0) #define UTEST_EXPECT_FAILED(EXPR) \ do { \ try { \ EXPR; \ std::cout << " " << #EXPR << " [FAILED]" << std::endl; \ } \ catch (gbe::Exception e) { \ std::cout << " " << #EXPR << " [SUCCESS]" << std::endl; \ } \ } while (0) #endif /* __UTEST_UTEST_HPP__ */ Release_v0.3/utests/utest_assert.cpp000066400000000000000000000026461223142177000177730ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file assert.cpp * \author Benjamin Segovia */ #include "utest_assert.hpp" #include "utest_exception.hpp" #include #include void onFailedAssertion(const char *msg, const char *file, const char *fn, int line) { char lineString[256]; sprintf(lineString, "%i", line); assert(msg != NULL && file != NULL && fn != NULL); const std::string str = "Error: " + std::string(msg) + "\n at file " + std::string(file) + ", function " + std::string(fn) + ", line " + std::string(lineString); throw Exception(str); } Release_v0.3/utests/utest_assert.hpp000066400000000000000000000025541223142177000177760ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file assert.hpp * * \author Benjamin Segovia */ #ifndef __OCL_ASSERT_HPP__ #define __OCL_ASSERT_HPP__ /*! To ensure that condition truth. Optional message is supported */ void onFailedAssertion(const char *msg, const char *file, const char *fn, int line); #define OCL_ASSERT(EXPR) \ do { \ if (!(EXPR)) \ onFailedAssertion(#EXPR, __FILE__, __FUNCTION__, __LINE__); \ } while (0) #define OCL_ASSERTM(EXPR, MSG) \ do { \ if (!(EXPR)) \ onFailedAssertion(MSG, __FILE__, __FUNCTION__, __LINE__); \ } while (0) #endif /* __OCL_ASSERT_HPP__ */ Release_v0.3/utests/utest_error.c000066400000000000000000000072701223142177000172610ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #include "utest_error.h" #include "CL/cl.h" const char *err_msg[] = { [-CL_SUCCESS] = "CL_SUCCESS", [-CL_DEVICE_NOT_FOUND] = "CL_DEVICE_NOT_FOUND", [-CL_DEVICE_NOT_AVAILABLE] = "CL_DEVICE_NOT_AVAILABLE", [-CL_COMPILER_NOT_AVAILABLE] = "CL_COMPILER_NOT_AVAILABLE", [-CL_MEM_OBJECT_ALLOCATION_FAILURE] = "CL_MEM_OBJECT_ALLOCATION_FAILURE", [-CL_OUT_OF_RESOURCES] = "CL_OUT_OF_RESOURCES", [-CL_OUT_OF_HOST_MEMORY] = "CL_OUT_OF_HOST_MEMORY", [-CL_PROFILING_INFO_NOT_AVAILABLE] = "CL_PROFILING_INFO_NOT_AVAILABLE", [-CL_MEM_COPY_OVERLAP] = "CL_MEM_COPY_OVERLAP", [-CL_IMAGE_FORMAT_MISMATCH] = "CL_IMAGE_FORMAT_MISMATCH", [-CL_IMAGE_FORMAT_NOT_SUPPORTED] = "CL_IMAGE_FORMAT_NOT_SUPPORTED", [-CL_BUILD_PROGRAM_FAILURE] = "CL_BUILD_PROGRAM_FAILURE", [-CL_MAP_FAILURE] = "CL_MAP_FAILURE", [-CL_MISALIGNED_SUB_BUFFER_OFFSET] = "CL_MISALIGNED_SUB_BUFFER_OFFSET", [-CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST] = "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST", [-CL_INVALID_VALUE] = "CL_INVALID_VALUE", [-CL_INVALID_DEVICE_TYPE] = "CL_INVALID_DEVICE_TYPE", [-CL_INVALID_PLATFORM] = "CL_INVALID_PLATFORM", [-CL_INVALID_DEVICE] = "CL_INVALID_DEVICE", [-CL_INVALID_CONTEXT] = "CL_INVALID_CONTEXT", [-CL_INVALID_QUEUE_PROPERTIES] = "CL_INVALID_QUEUE_PROPERTIES", [-CL_INVALID_COMMAND_QUEUE] = "CL_INVALID_COMMAND_QUEUE", [-CL_INVALID_HOST_PTR] = "CL_INVALID_HOST_PTR", [-CL_INVALID_MEM_OBJECT] = "CL_INVALID_MEM_OBJECT", [-CL_INVALID_IMAGE_FORMAT_DESCRIPTOR] = "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR", [-CL_INVALID_IMAGE_SIZE] = "CL_INVALID_IMAGE_SIZE", [-CL_INVALID_SAMPLER] = "CL_INVALID_SAMPLER", [-CL_INVALID_BINARY] = "CL_INVALID_BINARY", [-CL_INVALID_BUILD_OPTIONS] = "CL_INVALID_BUILD_OPTIONS", [-CL_INVALID_PROGRAM] = "CL_INVALID_PROGRAM", [-CL_INVALID_PROGRAM_EXECUTABLE] = "CL_INVALID_PROGRAM_EXECUTABLE", [-CL_INVALID_KERNEL_NAME] = "CL_INVALID_KERNEL_NAME", [-CL_INVALID_KERNEL_DEFINITION] = "CL_INVALID_KERNEL_DEFINITION", [-CL_INVALID_KERNEL] = "CL_INVALID_KERNEL", [-CL_INVALID_ARG_INDEX] = "CL_INVALID_ARG_INDEX", [-CL_INVALID_ARG_VALUE] = "CL_INVALID_ARG_VALUE", [-CL_INVALID_ARG_SIZE] = "CL_INVALID_ARG_SIZE", [-CL_INVALID_KERNEL_ARGS] = "CL_INVALID_KERNEL_ARGS", [-CL_INVALID_WORK_DIMENSION] = "CL_INVALID_WORK_DIMENSION", [-CL_INVALID_WORK_GROUP_SIZE] = "CL_INVALID_WORK_GROUP_SIZE", [-CL_INVALID_WORK_ITEM_SIZE] = "CL_INVALID_WORK_ITEM_SIZE", [-CL_INVALID_GLOBAL_OFFSET] = "CL_INVALID_GLOBAL_OFFSET", [-CL_INVALID_EVENT_WAIT_LIST] = "CL_INVALID_EVENT_WAIT_LIST", [-CL_INVALID_EVENT] = "CL_INVALID_EVENT", [-CL_INVALID_OPERATION] = "CL_INVALID_OPERATION", [-CL_INVALID_GL_OBJECT] = "CL_INVALID_GL_OBJECT", [-CL_INVALID_BUFFER_SIZE] = "CL_INVALID_BUFFER_SIZE", [-CL_INVALID_MIP_LEVEL] = "CL_INVALID_MIP_LEVEL", [-CL_INVALID_GLOBAL_WORK_SIZE] = "CL_INVALID_GLOBAL_WORK_SIZE", [-CL_INVALID_PROPERTY] = "CL_INVALID_PROPERTY" }; const size_t err_msg_n = sizeof(err_msg) / sizeof(err_msg[0]); Release_v0.3/utests/utest_error.h000066400000000000000000000016561223142177000172700ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #ifndef __UTEST_ERROR_H__ #define __UTEST_ERROR_H__ #include extern const char *err_msg[]; extern const size_t err_msg_n; #endif /* __UTEST_ERROR_H__ */ Release_v0.3/utests/utest_exception.hpp000066400000000000000000000027211223142177000204670ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file exception.hpp * * \author Benjamin Segovia */ #ifndef __UTEST_EXCEPTION_HPP__ #define __UTEST_EXCEPTION_HPP__ #include #include /*! Exception are only used while using unit tests */ class Exception : public std::exception { public: Exception(const std::string &msg) throw() : msg(msg) {} Exception(const Exception &other) throw() : msg(other.msg) {} ~Exception(void) throw() {} Exception &operator= (const Exception &other) throw() { this->msg = other.msg; return *this; } const char *what(void) const throw() { return msg.c_str(); } private: std::string msg; //!< String message }; #endif /* __UTEST_EXCEPTION_HPP__ */ Release_v0.3/utests/utest_file_map.cpp000066400000000000000000000047371223142177000202510ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #include "utest_file_map.hpp" #include "CL/cl.h" #include #include #include #include #include #include #include #include int cl_file_map_init(cl_file_map_t *fm) { assert(fm); memset(fm,0,sizeof(*fm)); return CL_SUCCESS; } void cl_file_map_destroy(cl_file_map_t *fm) { if (fm->mapped) { munmap(fm->start, fm->size); fm->start = fm->stop = 0; fm->size = 0; fm->mapped = CL_FALSE; } if(fm->fd) { close(fm->fd); fm->fd = 0; } free(fm->name); memset(fm,0,sizeof(*fm)); } void cl_file_map_delete(cl_file_map_t *fm) { if (fm == NULL) return; cl_file_map_destroy(fm); free(fm); } cl_file_map_t* cl_file_map_new(void) { cl_file_map_t *fm = NULL; if ((fm = (cl_file_map_t *) calloc(1, sizeof(cl_file_map_t))) == NULL) goto error; if (cl_file_map_init(fm) != CL_SUCCESS) goto error; exit: return fm; error: cl_file_map_delete(fm); fm = NULL; goto exit; } int cl_file_map_open(cl_file_map_t *fm, const char *name) { int err = CL_FILE_MAP_SUCCESS; /* Open the file */ fm->fd = open(name, O_RDONLY); if(fm->fd <= 0) { err = CL_FILE_MAP_FILE_NOT_FOUND; goto error; } if ((fm->name = (char*) calloc(strlen(name) + 1, sizeof(char))) == NULL) goto error; sprintf(fm->name, "%s", name); /* Map it */ fm->size = lseek(fm->fd, 0, SEEK_END); lseek(fm->fd, 0, SEEK_SET); fm->start = mmap(0, fm->size, PROT_READ, MAP_SHARED, fm->fd, 0); if(fm->start == NULL) { err = CL_FILE_MAP_FAILED_TO_MMAP; goto error; } fm->stop = ((char *) fm->start) + fm->size; fm->mapped = CL_TRUE; exit: return err; error: cl_file_map_destroy(fm); goto exit; } Release_v0.3/utests/utest_file_map.hpp000066400000000000000000000046241223142177000202510ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file assert.hpp * * \author Benjamin Segovia */ #ifndef __UTEST_FILE_MAP_HPP__ #define __UTEST_FILE_MAP_HPP__ #include "CL/cl.h" #include /* Map a file into memory for direct / cached / simple accesses */ typedef struct cl_file_map { void *start, *stop; /* First character and last one */ size_t size; /* Total size of the file */ int fd; /* Posix file descriptor */ cl_bool mapped; /* Indicate if a file was mapped or not */ char *name; /* File itself */ } cl_file_map_t; /* Report information about an open temptative */ enum { CL_FILE_MAP_SUCCESS = 0, CL_FILE_MAP_FILE_NOT_FOUND = 1, CL_FILE_MAP_FAILED_TO_MMAP = 2 }; /* Allocate and Initialize a file mapper (but do not map any file */ extern cl_file_map_t *cl_file_map_new(void); /* Initialize a file mapper (but do not map any file */ extern int cl_file_map_init(cl_file_map_t *fm); /* Destroy but do not deallocate a file map */ extern void cl_file_map_destroy(cl_file_map_t *fm); /* Destroy and free it */ extern void cl_file_map_delete(cl_file_map_t *fm); /* Open a file and returns the error code */ extern int cl_file_map_open(cl_file_map_t *fm, const char *name); static inline cl_bool cl_file_map_is_mapped(const cl_file_map_t *fm) { return fm->mapped; } static inline const char* cl_file_map_begin(const cl_file_map_t *fm) { return (const char*) fm->start; } static inline const char* cl_file_map_end(const cl_file_map_t *fm) { return (const char*) fm->stop; } static inline size_t cl_file_map_size(const cl_file_map_t *fm) { return fm->size; } #endif /* __UTEST_FILE_MAP_HPP__ */ Release_v0.3/utests/utest_helper.cpp000066400000000000000000000432641223142177000177520ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #include "utest_file_map.hpp" #include "utest_helper.hpp" #include "utest_error.h" #include "CL/cl.h" #include "CL/cl_intel.h" #include #include #include #include #include #define FATAL(...) \ do { \ fprintf(stderr, "error: "); \ fprintf(stderr, __VA_ARGS__); \ fprintf(stderr, "\n");\ assert(0); \ exit(-1); \ } while (0) #define FATAL_IF(COND, ...) \ do { \ if (COND) FATAL(__VA_ARGS__); \ } while (0) cl_platform_id platform = NULL; cl_device_id device = NULL; cl_context ctx = NULL; cl_program program = NULL; cl_kernel kernel = NULL; cl_command_queue queue = NULL; cl_mem buf[MAX_BUFFER_N] = {}; void *buf_data[MAX_BUFFER_N] = {}; size_t globals[3] = {}; size_t locals[3] = {}; #ifdef HAS_EGL Display *xDisplay; EGLDisplay eglDisplay; EGLContext eglContext = NULL; EGLSurface eglSurface; Window xWindow; void cl_ocl_destroy_egl_window() { eglMakeCurrent(eglDisplay, EGL_NO_SURFACE, EGL_NO_SURFACE, EGL_NO_CONTEXT); eglDestroyContext(eglDisplay, eglContext); eglDestroySurface(eglDisplay, eglSurface); XDestroyWindow(xDisplay, xWindow); XCloseDisplay(xDisplay); } bool init_egl_window(int width, int height) { XSetWindowAttributes swa; Window win, root; EGLint attr[] = { // some attributes to set up our egl-interface EGL_BUFFER_SIZE, 16, EGL_RENDERABLE_TYPE, EGL_OPENGL_BIT, EGL_NONE }; //// egl-contexts collect all state descriptions needed required for operation EGLint ctxattr[] = { #if 0 EGL_CONTEXT_CLIENT_VERSION, 2, #endif EGL_NONE }; EGLConfig ecfg; EGLint numConfig; eglContext = EGL_NO_CONTEXT; xDisplay = XOpenDisplay(NULL); if (xDisplay == NULL) { fprintf(stderr, "Failed to open DISPLAY.\n"); return false; } root = DefaultRootWindow(xDisplay); swa.event_mask = ExposureMask | PointerMotionMask | KeyPressMask; win = XCreateWindow( xDisplay, root, 0, 0, width, height, 0, CopyFromParent, InputOutput, CopyFromParent, CWEventMask, &swa); xWindow = win; /////// the egl part ////////////////////////////////////////////////////////////////// // egl provides an interface to connect the graphics related functionality of openGL ES // with the windowing interface and functionality of the native operation system (X11 // in our case. eglDisplay = eglGetDisplay( (EGLNativeDisplayType) xDisplay ); if ( eglDisplay == EGL_NO_DISPLAY ) { fprintf(stderr, "Got no EGL display.\n"); return false; } eglBindAPI(EGL_OPENGL_API); int m,n; if ( !eglInitialize( eglDisplay, &m, &n ) ) { fprintf(stderr, "Unable to initialize EGL\n"); return false; } if ( !eglChooseConfig( eglDisplay, attr, &ecfg, 1, &numConfig ) ) { fprintf(stderr, "Failed to choose config (eglError: %d)\n", eglGetError()); return false; } if ( numConfig != 1 ) { fprintf(stderr, "Didn't get exactly one config, but %d", numConfig); return false; } eglSurface = eglCreateWindowSurface ( eglDisplay, ecfg, win, NULL ); if ( eglSurface == EGL_NO_SURFACE ) { fprintf(stderr, "Unable to create EGL surface (eglError: %d)\n", eglGetError()); return false; } eglContext = eglCreateContext ( eglDisplay, ecfg, EGL_NO_CONTEXT, ctxattr ); if ( eglContext == EGL_NO_CONTEXT ) { fprintf(stderr, "Unable to create EGL context (eglError: %d)\n", eglGetError()); return false; } //// associate the egl-context with the egl-surface eglMakeCurrent( eglDisplay, eglSurface, eglSurface, eglContext); glClearColor(1.0, 1.0, 1.0, 1.0); glClear(GL_COLOR_BUFFER_BIT); glFinish(); eglSwapBuffers(eglDisplay, eglSurface); return true; } #endif static const char* cl_test_channel_order_string(cl_channel_order order) { switch(order) { #define DECL_ORDER(WHICH) case CL_##WHICH: return "CL_"#WHICH DECL_ORDER(R); DECL_ORDER(A); DECL_ORDER(RG); DECL_ORDER(RA); DECL_ORDER(RGB); DECL_ORDER(RGBA); DECL_ORDER(BGRA); DECL_ORDER(ARGB); DECL_ORDER(INTENSITY); DECL_ORDER(LUMINANCE); DECL_ORDER(Rx); DECL_ORDER(RGx); DECL_ORDER(RGBx); #undef DECL_ORDER default: return "Unsupported image channel order"; }; } static const char* cl_test_channel_type_string(cl_channel_type type) { switch(type) { #define DECL_TYPE(WHICH) case CL_##WHICH: return "CL_"#WHICH DECL_TYPE(SNORM_INT8); DECL_TYPE(SNORM_INT16); DECL_TYPE(UNORM_INT8); DECL_TYPE(UNORM_INT16); DECL_TYPE(UNORM_SHORT_565); DECL_TYPE(UNORM_SHORT_555); DECL_TYPE(UNORM_INT_101010); DECL_TYPE(SIGNED_INT8); DECL_TYPE(SIGNED_INT16); DECL_TYPE(SIGNED_INT32); DECL_TYPE(UNSIGNED_INT8); DECL_TYPE(UNSIGNED_INT16); DECL_TYPE(UNSIGNED_INT32); DECL_TYPE(HALF_FLOAT); DECL_TYPE(FLOAT); #undef DECL_TYPE default: return "Unsupported image channel type"; }; } static void clpanic(const char *msg, int rval) { printf("Failed: %s (%d)\n", msg, rval); exit(-1); } char* cl_do_kiss_path(const char *file, cl_device_id device) { cl_int ver; const char *sub_path = NULL; char *ker_path = NULL; const char *kiss_path = getenv("OCL_KERNEL_PATH"); size_t sz = strlen(file); if (device == NULL) sub_path = ""; else { if (clGetGenVersionIntel(device, &ver) != CL_SUCCESS) clpanic("Unable to get Gen version", -1); sub_path = ""; } if (kiss_path == NULL) clpanic("set OCL_KERNEL_PATH. This is where the kiss kernels are", -1); sz += strlen(kiss_path) + strlen(sub_path) + 2; /* +1 for end of string, +1 for '/' */ if ((ker_path = (char*) malloc(sz)) == NULL) clpanic("Allocation failed", -1); sprintf(ker_path, "%s/%s%s", kiss_path, sub_path, file); return ker_path; } int cl_kernel_init(const char *file_name, const char *kernel_name, int format, const char * build_opt) { cl_file_map_t *fm = NULL; char *ker_path = NULL; cl_int status = CL_SUCCESS; /* Load the program and build it */ ker_path = cl_do_kiss_path(file_name, device); if (format == LLVM) program = clCreateProgramWithLLVMIntel(ctx, 1, &device, ker_path, &status); else if (format == SOURCE) { cl_file_map_t *fm = cl_file_map_new(); FATAL_IF (cl_file_map_open(fm, ker_path) != CL_FILE_MAP_SUCCESS, "Failed to open file \"%s\" with kernel \"%s\". Did you properly set OCL_KERNEL_PATH variable?", file_name, kernel_name); const char *src = cl_file_map_begin(fm); const size_t sz = cl_file_map_size(fm); program = clCreateProgramWithSource(ctx, 1, &src, &sz, &status); cl_file_map_delete(fm); } else FATAL("Not able to create program from binary"); if (status != CL_SUCCESS) { fprintf(stderr, "error calling clCreateProgramWithBinary\n"); goto error; } /* OCL requires to build the program even if it is created from a binary */ OCL_CALL (clBuildProgram, program, 1, &device, build_opt, NULL, NULL); /* Create a kernel from the program */ kernel = clCreateKernel(program, kernel_name, &status); if (status != CL_SUCCESS) { fprintf(stderr, "error calling clCreateKernel\n"); goto error; } exit: free(ker_path); cl_file_map_delete(fm); return status; error: goto exit; } #define GET_PLATFORM_STR_INFO(LOWER_NAME, NAME) \ { \ size_t param_value_size; \ OCL_CALL (clGetPlatformInfo, platform, CL_PLATFORM_##NAME, 0, 0, ¶m_value_size); \ std::vector param_value(param_value_size); \ OCL_CALL (clGetPlatformInfo, platform, CL_PLATFORM_##NAME, \ param_value_size, param_value.empty() ? NULL : ¶m_value.front(), \ ¶m_value_size); \ std::string str; \ if (!param_value.empty()) \ str = std::string(¶m_value.front(), param_value_size-1); \ printf("platform_" #LOWER_NAME " \"%s\"\n", str.c_str()); \ } #include #define GET_DEVICE_STR_INFO(LOWER_NAME, NAME) \ std::string LOWER_NAME ##Str; \ OCL_CALL (clGetDeviceInfo, device, CL_DEVICE_##NAME, 0, 0, ¶m_value_size); \ { \ std::vector param_value(param_value_size); \ OCL_CALL (clGetDeviceInfo, device, CL_DEVICE_##NAME, \ param_value_size, param_value.empty() ? NULL : ¶m_value.front(), \ ¶m_value_size); \ if (!param_value.empty()) \ LOWER_NAME ##Str = std::string(¶m_value.front(), param_value_size-1); \ } \ printf("device_" #LOWER_NAME " \"%s\"\n", LOWER_NAME ##Str.c_str()); int cl_ocl_init(void) { cl_int status = CL_SUCCESS; cl_uint platform_n; size_t i; #ifdef HAS_EGL bool hasGLExt = false; #endif cl_context_properties *props = NULL; /* Get the platform number */ OCL_CALL (clGetPlatformIDs, 0, NULL, &platform_n); printf("platform number %u\n", platform_n); assert(platform_n >= 1); /* Get a valid platform */ OCL_CALL (clGetPlatformIDs, 1, &platform, &platform_n); GET_PLATFORM_STR_INFO(profile, PROFILE); GET_PLATFORM_STR_INFO(name, NAME); GET_PLATFORM_STR_INFO(vendor, VENDOR); GET_PLATFORM_STR_INFO(version, VERSION); GET_PLATFORM_STR_INFO(extensions, EXTENSIONS); /* Get the device (only GPU device is supported right now) */ OCL_CALL (clGetDeviceIDs, platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL); { size_t param_value_size; GET_DEVICE_STR_INFO(profile, PROFILE); GET_DEVICE_STR_INFO(name, NAME); GET_DEVICE_STR_INFO(vendor, VENDOR); GET_DEVICE_STR_INFO(version, VERSION); GET_DEVICE_STR_INFO(extensions, EXTENSIONS); GET_DEVICE_STR_INFO(opencl_c_version, OPENCL_C_VERSION); #ifdef HAS_EGL if (std::strstr(extensionsStr.c_str(), "cl_khr_gl_sharing")) { hasGLExt = true; } #endif } #ifdef HAS_EGL if (hasGLExt) { int i = 0; props = new cl_context_properties[7]; props[i++] = CL_CONTEXT_PLATFORM; props[i++] = (cl_context_properties)platform; if (init_egl_window(EGL_WINDOW_WIDTH, EGL_WINDOW_HEIGHT)) { props[i++] = CL_EGL_DISPLAY_KHR; props[i++] = (cl_context_properties)eglGetCurrentDisplay(); props[i++] = CL_GL_CONTEXT_KHR; props[i++] = (cl_context_properties)eglGetCurrentContext(); } props[i++] = 0; } #endif /* Now create a context */ ctx = clCreateContext(props, 1, &device, NULL, NULL, &status); if (status != CL_SUCCESS) { fprintf(stderr, "error calling clCreateContext\n"); goto error; } /* All image types currently supported by the context */ cl_image_format fmt[256]; cl_uint fmt_n; clGetSupportedImageFormats(ctx, 0, CL_MEM_OBJECT_IMAGE2D, 256, fmt, &fmt_n); printf("%u image formats are supported\n", fmt_n); for (i = 0; i < fmt_n; ++i) printf("[%s %s]\n", cl_test_channel_order_string(fmt[i].image_channel_order), cl_test_channel_type_string(fmt[i].image_channel_data_type)); /* We are going to push NDRange kernels here */ queue = clCreateCommandQueue(ctx, device, 0, &status); if (status != CL_SUCCESS) { fprintf(stderr, "error calling clCreateCommandQueue\n"); goto error; } error: if (props) delete props; return status; } int cl_test_init(const char *file_name, const char *kernel_name, int format) { cl_int status = CL_SUCCESS; /* Initialize OCL */ if ((status = cl_ocl_init()) != CL_SUCCESS) goto error; /* Load the kernel */ if ((status = cl_kernel_init(file_name, kernel_name, format, NULL)) != CL_SUCCESS) goto error; error: return status; } void cl_kernel_destroy(void) { if (kernel) clReleaseKernel(kernel); if (program) clReleaseProgram(program); kernel = NULL; program = NULL; } void cl_ocl_destroy(void) { clReleaseCommandQueue(queue); clReleaseContext(ctx); #ifdef HAS_EGL if (eglContext != NULL) { cl_ocl_destroy_egl_window(); eglContext = NULL; } #endif } void cl_test_destroy(void) { cl_kernel_destroy(); cl_ocl_destroy(); printf("%i memory leaks\n", clReportUnfreedIntel()); assert(clReportUnfreedIntel() == 0); } void cl_buffer_destroy(void) { int i; for (i = 0; i < MAX_BUFFER_N; ++i) { if (buf_data[i] != NULL) { clUnmapBufferIntel(buf[i]); buf_data[i] = NULL; } if (buf[i] != NULL) { clReleaseMemObject(buf[i]); buf[i] = NULL; } } } void cl_report_perf_counters(cl_mem perf) { cl_int status = CL_SUCCESS; uint32_t *start = NULL, *end = NULL; uint32_t i; if (perf == NULL) return; start = (uint32_t*) clMapBufferIntel(perf, &status); assert(status == CL_SUCCESS && start != NULL); end = start + 128; printf("BEFORE\n"); for (i = 0; i < 6*8; ++i) { if (i % 8 == 0) printf("\n"); printf("[%3u 0x%8x] ", i, start[i]); } printf("\n\n"); printf("AFTER\n"); for (i = 0; i < 6*8; ++i) { if (i % 8 == 0) printf("\n"); printf("[%3u 0x%8x] ", i, end[i]); } printf("\n\n"); printf("DIFF\n"); for (i = 0; i < 6*8; ++i) { if (i % 8 == 0) printf("\n"); printf("[%3u %8i] ", i, end[i] - start[i]); } printf("\n\n"); clUnmapBufferIntel(perf); } struct bmphdr { // 2 bytes of magic here, "BM", total header size is 54 bytes! int filesize; // 4 total file size incl header short as0, as1; // 8 app specific int bmpoffset; // 12 ofset of bmp data int headerbytes; // 16 bytes in header from this point (40 actually) int width; // 20 int height; // 24 short nplanes; // 26 no of color planes short bpp; // 28 bits/pixel int compression; // 32 BI_RGB = 0 = no compression int sizeraw; // 36 size of raw bmp file, excluding header, incl padding int hres; // 40 horz resolutions pixels/meter int vres; // 44 int npalcolors; // 48 No of colors in palette int nimportant; // 52 No of important colors // raw b, g, r data here, dword aligned per scan line }; int *cl_read_bmp(const char *filename, int *width, int *height) { struct bmphdr hdr; char *bmppath = cl_do_kiss_path(filename, device); FILE *fp = fopen(bmppath, "rb"); assert(fp); char magic[2]; int ret; ret = fread(&magic[0], 1, 2, fp); assert(2 == ret); assert(magic[0] == 'B' && magic[1] == 'M'); ret = fread(&hdr, sizeof(hdr), 1, fp); assert(1 == ret); assert(hdr.width > 0 && hdr.height > 0 && hdr.nplanes == 1 && hdr.compression == 0); int *rgb32 = (int *) malloc(hdr.width * hdr.height * sizeof(int)); assert(rgb32); int x, y; int *dst = rgb32; for (y = 0; y < hdr.height; y++) { for (x = 0; x < hdr.width; x++) { assert(!feof(fp)); int b = (getc(fp) & 0x0ff); int g = (getc(fp) & 0x0ff); int r = (getc(fp) & 0x0ff); *dst++ = (r | (g << 8) | (b << 16) | 0xff000000); /* abgr */ } while (x & 3) { getc(fp); x++; } // each scanline padded to dword // printf("read row %d\n", y); // fflush(stdout); } fclose(fp); *width = hdr.width; *height = hdr.height; free(bmppath); return rgb32; } void cl_write_bmp(const int *data, int width, int height, const char *filename) { int x, y; FILE *fp = fopen(filename, "wb"); assert(fp); char *raw = (char *) malloc(width * height * sizeof(int)); // at most assert(raw); char *p = raw; for (y = 0; y < height; y++) { for (x = 0; x < width; x++) { int c = *data++; *p++ = ((c >> 16) & 0xff); *p++ = ((c >> 8) & 0xff); *p++ = ((c >> 0) & 0xff); } while (x & 3) { *p++ = 0; x++; } // pad to dword } int sizeraw = p - raw; int scanline = (width * 3 + 3) & ~3; assert(sizeraw == scanline * height); struct bmphdr hdr; hdr.filesize = scanline * height + sizeof(hdr) + 2; hdr.as0 = 0; hdr.as1 = 0; hdr.bmpoffset = sizeof(hdr) + 2; hdr.headerbytes = 40; hdr.width = width; hdr.height = height; hdr.nplanes = 1; hdr.bpp = 24; hdr.compression = 0; hdr.sizeraw = sizeraw; hdr.hres = 0; // 2834; hdr.vres = 0; // 2834; hdr.npalcolors = 0; hdr.nimportant = 0; /* Now write bmp file */ char magic[2] = { 'B', 'M' }; fwrite(&magic[0], 1, 2, fp); fwrite(&hdr, 1, sizeof(hdr), fp); fwrite(raw, 1, hdr.sizeraw, fp); fclose(fp); free(raw); } static const float pixel_threshold = 0.05f; static const float max_error_ratio = 0.001f; int cl_check_image(const int *img, int w, int h, const char *bmp) { int refw, refh; int *ref = cl_read_bmp(bmp, &refw, &refh); if (ref == NULL || refw != w || refh != h) return 0; const int n = w*h; int discrepancy = 0; for (int i = 0; i < n; ++i) { const float r = (float) (img[i] & 0xff); const float g = (float) ((img[i] >> 8) & 0xff); const float b = (float) ((img[i] >> 16) & 0xff); const float rr = (float) (ref[i] & 0xff); const float rg = (float) ((ref[i] >> 8) & 0xff); const float rb = (float) ((ref[i] >> 16) & 0xff); const float dr = fabs(r-rr) / (1.f/255.f + std::max(r,rr)); const float dg = fabs(g-rg) / (1.f/255.f + std::max(g,rg)); const float db = fabs(b-rb) / (1.f/255.f + std::max(b,rb)); const float err = sqrtf(dr*dr+dg*dg+db*db); if (err > pixel_threshold) discrepancy++; } free(ref); return (float(discrepancy) / float(n) > max_error_ratio) ? 0 : 1; } Release_v0.3/utests/utest_helper.hpp000066400000000000000000000151171223142177000177530ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file utest_helper.hpp * * \author Benjamin Segovia */ #ifndef __UTEST_HELPER_HPP__ #define __UTEST_HELPER_HPP__ #include "CL/cl.h" #include "CL/cl_intel.h" #include "utest.hpp" #include "utest_assert.hpp" #include "utest_error.h" #include #include #include #ifdef HAS_EGL #define EGL_WINDOW_WIDTH 256 #define EGL_WINDOW_HEIGHT 256 #include #include #include #include extern EGLDisplay eglDisplay; extern EGLContext eglContext; extern EGLSurface eglSurface; #endif #define OCL_THROW_ERROR(FN, STATUS) \ do { \ char msg[2048]; \ sprintf(msg, "error calling %s with error %s \n", #FN, err_msg[-STATUS]); \ OCL_ASSERTM(false, msg); \ } while (0) #define OCL_CALL(FN, ...) \ do { \ int status = FN(__VA_ARGS__); \ if (status != CL_SUCCESS) OCL_THROW_ERROR(FN, status); \ } while (0) #define OCL_CREATE_KERNEL(NAME) \ do { \ OCL_CALL (cl_kernel_init, NAME".cl", NAME, SOURCE, NULL); \ } while (0) #define OCL_CREATE_KERNEL_FROM_FILE(FILE_NAME, KERNEL_NAME) \ do { \ OCL_CALL(cl_kernel_init, FILE_NAME".cl", KERNEL_NAME, SOURCE, NULL); \ } while (0) #define OCL_FLUSH() \ do { \ OCL_CALL(clFlush, queue); \ } while(0) #define OCL_FINISH() \ do { \ OCL_CALL(clFinish, queue); \ } while(0) #define OCL_CALL2(FN, RET, ...) \ do { \ cl_int status; \ RET = FN(__VA_ARGS__, &status);\ if (status != CL_SUCCESS) OCL_THROW_ERROR(FN, status); \ } while (0) #define OCL_CREATE_BUFFER(BUFFER, FLAGS, SIZE, DATA) \ OCL_CALL2(clCreateBuffer, BUFFER, ctx, FLAGS, SIZE, DATA) #define OCL_CREATE_USER_EVENT(EVENT) \ OCL_CALL2(clCreateUserEvent, EVENT, ctx) #define OCL_SET_USER_EVENT_STATUS(EVENT, STATUS) \ OCL_CALL(clSetUserEventStatus, EVENT, STATUS) #define OCL_CREATE_IMAGE(IMAGE, FLAGS, FORMAT, DESC, DATA) \ OCL_CALL2(clCreateImage, IMAGE, ctx, FLAGS, FORMAT, DESC, DATA) #define OCL_CREATE_IMAGE2D(IMAGE, FLAGS, FORMAT, WIDTH, HEIGHT, PITCH, DATA) \ OCL_CALL2(clCreateImage2D, IMAGE, ctx, FLAGS, FORMAT, WIDTH, HEIGHT, PITCH, DATA) #define OCL_CREATE_IMAGE3D(IMAGE, FLAGS, FORMAT, WIDTH, HEIGHT, DEPTH, RPITCH, SPITCH, DATA) \ OCL_CALL2(clCreateImage3D, IMAGE, ctx, FLAGS, FORMAT, WIDTH, HEIGHT, DEPTH, RPITCH, SPITCH, DATA) #define OCL_READ_IMAGE(IMAGE, ORIGIN, REGION, DATA) \ OCL_CALL(clEnqueueReadImage, queue, IMAGE, CL_TRUE, ORIGIN, REGION, 0, 0, DATA, 0, NULL, NULL) #define OCL_WRITE_IMAGE(IMAGE, ORIGIN, REGION, DATA) \ OCL_CALL(clEnqueueWriteImage, queue, IMAGE, CL_TRUE, ORIGIN, REGION, 0, 0, DATA, 0, NULL, NULL) #define OCL_CREATE_GL_IMAGE(IMAGE, FLAGS, TARGET, LEVEL, TEXTURE) \ OCL_CALL2(clCreateFromGLTexture, IMAGE, ctx, FLAGS, TARGET, LEVEL, TEXTURE) #define OCL_CREATE_GL_IMAGE2D(IMAGE, FLAGS, TARGET, LEVEL, TEXTURE) \ OCL_CALL2(clCreateFromGLTexture2D, IMAGE, ctx, FLAGS, TARGET, LEVEL, TEXTURE) #define OCL_CREATE_GL_IMAGE3D(IMAGE, FLAGS, TARGET, LEVEL, TEXTURE) \ OCL_CALL2(clCreateFromGLTexture3D, IMAGE, ctx, FLAGS, TARGET, LEVEL, TEXTURE) #define OCL_ENQUEUE_ACQUIRE_GL_OBJECTS(ID) \ OCL_CALL(clEnqueueAcquireGLObjects, queue, 1, &buf[ID], 0, 0, 0) #define OCL_SWAP_EGL_BUFFERS() \ eglSwapBuffers(eglDisplay, eglSurface); #define OCL_CREATE_SAMPLER(SAMPLER, ADDRESS_MODE, FILTER_MODE) \ OCL_CALL2(clCreateSampler, SAMPLER, ctx, 0, ADDRESS_MODE, FILTER_MODE) #define OCL_MAP_BUFFER(ID) \ OCL_CALL2(clMapBufferIntel, buf_data[ID], buf[ID]) #define OCL_UNMAP_BUFFER(ID) \ do { \ if (buf[ID] != NULL) { \ OCL_CALL (clUnmapBufferIntel, buf[ID]); \ buf_data[ID] = NULL; \ } \ } while (0) #define OCL_MAP_BUFFER_GTT(ID) \ OCL_CALL2(clMapBufferGTTIntel, buf_data[ID], buf[ID]) #define OCL_UNMAP_BUFFER_GTT(ID) \ do { \ if (buf[ID] != NULL) { \ OCL_CALL (clUnmapBufferGTTIntel, buf[ID]); \ buf_data[ID] = NULL; \ } \ } while (0) #define OCL_NDRANGE(DIM_N) \ OCL_CALL (clEnqueueNDRangeKernel, queue, kernel, DIM_N, NULL, globals, locals, 0, NULL, NULL) #define OCL_SET_ARG(ID, SIZE, ARG) \ OCL_CALL (clSetKernelArg, kernel, ID, SIZE, ARG) #define OCL_CHECK_IMAGE(DATA, W, H, FILENAME) \ if (cl_check_image(DATA, W, H, FILENAME) == 0) \ OCL_ASSERTM(false, "image mismatch") enum { MAX_BUFFER_N = 16 }; extern cl_platform_id platform; extern cl_device_id device; extern cl_context ctx; extern cl_program program; extern cl_kernel kernel; extern cl_command_queue queue; extern cl_mem buf[MAX_BUFFER_N]; extern void* buf_data[MAX_BUFFER_N]; extern size_t globals[3]; extern size_t locals[3]; enum { SOURCE = 0, LLVM = 1, BIN = 2 }; /* Init OpenCL */ extern int cl_ocl_init(void); /* Init program and kernel for the test */ extern int cl_kernel_init(const char *file_name, const char *kernel_name, int format, const char * build_opt); /* Get the file path */ extern char* cl_do_kiss_path(const char *file, cl_device_id device); /* init the bunch of global varaibles here */ extern int cl_test_init(const char *file_name, const char *kernel_name, int format); /* Unmap and release all the created buffers */ extern void cl_buffer_destroy(void); /* Release OCL queue, context and device */ extern void cl_ocl_destroy(void); /* Release kernel and program */ extern void cl_kernel_destroy(void); /* Release everything allocated in cl_test_init */ extern void cl_test_destroy(void); /* Nicely output the performance counters */ extern void cl_report_perf_counters(cl_mem perf); /* Read a bmp from file */ extern int *cl_read_bmp(const char *filename, int *width, int *height); /* Write a bmp to a file */ extern void cl_write_bmp(const int *data, int width, int height, const char *filename); /* Check data from img against bmp file located at "bmp" */ extern int cl_check_image(const int *img, int w, int h, const char *bmp); #endif /* __UTEST_HELPER_HPP__ */ Release_v0.3/utests/utest_run.cpp000066400000000000000000000053321223142177000172710ustar00rootroot00000000000000/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file utest_run.cpp * \author Benjamin Segovia * * Just run the unit tests. The user can possibly provides the subset of it */ #include "utest_helper.hpp" #include "utest_exception.hpp" #include #include static const char *shortopts = "c:lanh"; struct option longopts[] = { {"casename", required_argument, NULL, 'c'}, {"list", no_argument, NULL, 'l'}, {"all", no_argument, NULL, 'a'}, {"allnoissue", no_argument, NULL, 'n'}, {"help", no_argument, NULL, 'h'}, {0, 0, 0, 0}, }; void usage() { std::cout << "\ Usage:\n\ ./utest_run